options(stringsAsFactors = F)
library(Seurat)
library(RColorBrewer)
library(ggplot2)
library(cowplot)
library(dplyr)
library(patchwork)
Attaching SeuratObject
Attaching package: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
Attaching package: ‘patchwork’
The following object is masked from ‘package:cowplot’:
align_plots
RColorBrewer::brewer.pal(9, "Set1")
RColorBrewer::brewer.pal(8, "Set2")
# final 16 clusters of CNCC
library(RColorBrewer)
myColors <- c(brewer.pal(9,"Set1")[c(1:5,7:9)], brewer.pal(8,"Set2")[1:3], brewer.pal(12,"Set3")[c(3:10)])[1:16]
getwd()
SRA.meta <- read.csv("../public/2021_Chen/SraRunTable.csv")
colnames(SRA.meta)
SRA.meta <- SRA.meta[,c("Run","dev_stage")]
SRA.meta
| Run | dev_stage |
|---|---|
| <chr> | <chr> |
| SRR10065158 | E10.5 |
| SRR10065151 | P7 |
| SRR10065152 | P1 |
| SRR10065153 | E17.5 |
| SRR10065154 | E14.5 |
| SRR10065155 | E13.5 |
| SRR10065156 | E12.5 |
| SRR10065157 | E11.5 |
raw.count.list <- list()
for (i in 1:nrow(SRA.meta)) {
print(i)
tmp.sra <- SRA.meta[i,"Run"]
tmp.stage <- SRA.meta[i,"dev_stage"]
tmp.report.path <- paste("../public/2021_Chen/done/", tmp.sra, "_report/outs/filtered_feature_bc_matrix", sep = "")
tmp.rawdata <- Read10X(tmp.report.path)
colnames(tmp.rawdata) <- paste(tmp.stage, colnames(tmp.rawdata), sep="_")
raw.count.list[[tmp.stage]] <- tmp.rawdata
}
[1] 1 [1] 2 [1] 3 [1] 4 [1] 5 [1] 6 [1] 7 [1] 8
# tmp.sra
# tmp.stage
dim(raw.count.list[[1]])
save(raw.count.list, file = "2021_Chen.raw.count.list.Rdata")
cellMeta <- data.frame()
for (i in names(raw.count.list)) {
print(i)
tmp.meta <- data.frame(stage=i, cellName=colnames(raw.count.list[[i]]))
cellMeta <- rbind(cellMeta, tmp.meta)
}
[1] "E10.5" [1] "P7" [1] "P1" [1] "E17.5" [1] "E14.5" [1] "E13.5" [1] "E12.5" [1] "E11.5"
exprMat <- cbind(raw.count.list[[1]], raw.count.list[[2]], raw.count.list[[3]], raw.count.list[[4]],
raw.count.list[[5]], raw.count.list[[6]], raw.count.list[[7]], raw.count.list[[8]])
save(exprMat, cellMeta, file = "dowloaded.exprMatrix.metaInfo.Rdata")
print(load("dowloaded.exprMatrix.metaInfo.Rdata"))
[1] "exprMat" "cellMeta"
cellMeta$lab <- "zhou"
# # removed datasets
# print(load("E13.5_CNCC_merged_updated.Rdata"))
print(load("all_CNCC_merged.Rdata"))
[1] "seuset" "all_tsne" "markers"
# vcl.raw.count <- seuset@assays$RNA@counts
vcl.raw.count <- seuset@raw.data
dim(vcl.raw.count)
vcl.cellMeta <- data.frame(stage="E13.5", cellName=colnames(vcl.raw.count), lab="Elly")
vcl.cellMeta[grep("ct1", vcl.cellMeta$cellName),]$stage <- "E14.0"
dim(exprMat)
# gene.anno.mm10.v3 <- read.csv("https://github.com/leezx/Toolsets/raw/master/data/gene.anno.mm10.3.0.0.csv", sep = ";", header = F)
# gene.anno.mm10.v3 <- gene.anno.mm10.v3[,c("V1","V3","V4","V7","V9","V11")]
# colnames(gene.anno.mm10.v3) <- c("chr", "start", "end", "gene_id", "gene_name", "gene_biotype")
# save(gene.anno.mm10.v3, file = "gene.anno.mm10.v3.rda")
# gene.anno.GRCh38.v3 <- read.csv("https://github.com/leezx/Toolsets/raw/master/data/gene.anno.GRCh38.3.0.0.csv", sep = ";", header = F)
# gene.anno.GRCh38.v3 <- gene.anno.GRCh38.v3[,c("V1","V3","V4","V7","V9","V11")]
# colnames(gene.anno.GRCh38.v3) <- c("chr", "start", "end", "gene_id", "gene_name", "gene_biotype")
# save(gene.anno.GRCh38.v3, file = "gene.anno.GRCh38.v3.rda")
print(load("gene.anno.mm10.v3.rda"))
print(load("gene.anno.GRCh38.v3.rda"))
[1] "gene.anno.mm10.v3" [1] "gene.anno.GRCh38.v3"
# go to iterbi package
vcl.raw.count.full <- add.missing.genes(vcl.raw.count, all.genes = unique(gene.anno.mm10.v3$gene_name))
exprMat.full <- add.missing.genes(exprMat, all.genes = unique(gene.anno.mm10.v3$gene_name))
dim(vcl.raw.count.full)
dim(exprMat.full)
raw.count.full <- cbind(vcl.raw.count.full, exprMat.full)
dim(raw.count.full)
cellMeta <- rbind(vcl.cellMeta, cellMeta)
cellMeta$split <- paste(cellMeta$lab, cellMeta$stage, sep = "_")
rownames(cellMeta) <- cellMeta$cellName
head(cellMeta)
| stage | cellName | lab | split | |
|---|---|---|---|---|
| <chr> | <chr> | <chr> | <chr> | |
| vcl_CCTTCGACACCAACCG | E13.5 | vcl_CCTTCGACACCAACCG | Elly | Elly_E13.5 |
| vcl_CTCTAATTCATAGCAC | E13.5 | vcl_CTCTAATTCATAGCAC | Elly | Elly_E13.5 |
| vcl_CAGAGAGCAGCGAACA | E13.5 | vcl_CAGAGAGCAGCGAACA | Elly | Elly_E13.5 |
| vcl_GCATGCGTCTGCCCTA | E13.5 | vcl_GCATGCGTCTGCCCTA | Elly | Elly_E13.5 |
| vcl_AATCCAGGTCGAACAG | E13.5 | vcl_AATCCAGGTCGAACAG | Elly | Elly_E13.5 |
| vcl_CATCAAGTCTGTCAAG | E13.5 | vcl_CATCAAGTCTGTCAAG | Elly | Elly_E13.5 |
integrated.cncc <- CreateSeuratObject(counts = raw.count.full, meta.data = cellMeta, project = "integrated.cncc",
min.cells = 3, min.features = 200)
integrated.cncc
An object of class Seurat 21064 features across 52784 samples within 1 assay Active assay: RNA (21064 features, 0 variable features)
library('Matrix')
integrated.cncc[["percent.mt"]] <- PercentageFeatureSet(integrated.cncc, pattern = "^mt-")
VlnPlot(integrated.cncc, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
integrated.cncc <- subset(integrated.cncc, subset = nFeature_RNA > 200 & nFeature_RNA < 7500 & percent.mt < 20 & nCount_RNA < 75000)
# split the dataset into a list of two seurat objects (stim and CTRL)
cncc.list <- SplitObject(integrated.cncc, split.by = "split")
# normalize and identify variable features for each dataset independently
cncc.list <- lapply(X = cncc.list, FUN = function(x) {
x <- NormalizeData(x)
x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})
# marker gene provided by EMBO paper
known.markers.df <- read.table("marker.genes.txt", header = T)
known.markers <- unique(unlist(known.markers.df))
length(known.markers)
# select features that are repeatedly variable across datasets for integration
features <- SelectIntegrationFeatures(object.list = cncc.list, nfeatures = 1000)
length(features)
features <- unique(c(features, known.markers))
length(features)
cncc.anchors <- FindIntegrationAnchors(object.list = cncc.list, anchor.features = features)
Scaling features for provided objects Finding all pairwise anchors Running CCA Merging objects Finding neighborhoods Finding anchors Found 7124 anchors Filtering anchors Retained 6643 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 10503 anchors Filtering anchors Retained 4066 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 6376 anchors Filtering anchors Retained 3093 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 12548 anchors Filtering anchors Retained 2554 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 6619 anchors Filtering anchors Retained 2363 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 9239 anchors Filtering anchors Retained 1479 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 12881 anchors Filtering anchors Retained 2299 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 6507 anchors Filtering anchors Retained 2163 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 9334 anchors Filtering anchors Retained 1865 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 10139 anchors Filtering anchors Retained 5831 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 11865 anchors Filtering anchors Retained 2821 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 6197 anchors Filtering anchors Retained 2469 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 8896 anchors Filtering anchors Retained 1476 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 9138 anchors Filtering anchors Retained 4967 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 10269 anchors Filtering anchors Retained 6821 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 14756 anchors Filtering anchors Retained 4774 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 6872 anchors Filtering anchors Retained 3402 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 9926 anchors Filtering anchors Retained 2413 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 11328 anchors Filtering anchors Retained 4986 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 11400 anchors Filtering anchors Retained 6031 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 10121 anchors Filtering anchors Retained 6220 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 15183 anchors Filtering anchors Retained 3572 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 6833 anchors Filtering anchors Retained 2695 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 10092 anchors Filtering anchors Retained 2516 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 11885 anchors Filtering anchors Retained 3176 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 11802 anchors Filtering anchors Retained 3638 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 10902 anchors Filtering anchors Retained 3577 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 13543 anchors Filtering anchors Retained 9261 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 14474 anchors Filtering anchors Retained 6455 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 6708 anchors Filtering anchors Retained 3932 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 9863 anchors Filtering anchors Retained 4601 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 12746 anchors Filtering anchors Retained 2319 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 12933 anchors Filtering anchors Retained 2361 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 11880 anchors Filtering anchors Retained 2930 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 13660 anchors Filtering anchors Retained 6261 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 14657 anchors Filtering anchors Retained 8394 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 17545 anchors Filtering anchors Retained 4475 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 7682 anchors Filtering anchors Retained 2807 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 10396 anchors Filtering anchors Retained 5400 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 14121 anchors Filtering anchors Retained 2786 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 14142 anchors Filtering anchors Retained 2347 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 12851 anchors Filtering anchors Retained 2751 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 16132 anchors Filtering anchors Retained 5914 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 16842 anchors Filtering anchors Retained 8232 anchors Extracting within-dataset neighbors Running CCA Merging objects Finding neighborhoods Finding anchors Found 16567 anchors Filtering anchors Retained 12166 anchors Extracting within-dataset neighbors
# this command creates an 'integrated' data assay
cncc.combined <- IntegrateData(anchorset = cncc.anchors, features.to.integrate = features)
Merging dataset 2 into 1 Extracting anchors for merged samples Finding integration vectors Finding integration vector weights Integrating data Merging dataset 9 into 10 Extracting anchors for merged samples Finding integration vectors Finding integration vector weights Integrating data Merging dataset 3 into 10 9 Extracting anchors for merged samples Finding integration vectors Finding integration vector weights Integrating data Merging dataset 6 into 5 Extracting anchors for merged samples Finding integration vectors Finding integration vector weights Integrating data Merging dataset 8 into 7 Extracting anchors for merged samples Finding integration vectors Finding integration vector weights Integrating data Merging dataset 4 into 5 6 Extracting anchors for merged samples Finding integration vectors Finding integration vector weights Integrating data Merging dataset 7 8 into 10 9 3 Extracting anchors for merged samples Finding integration vectors Finding integration vector weights Integrating data Merging dataset 1 2 into 10 9 3 7 8 Extracting anchors for merged samples Finding integration vectors Finding integration vector weights Integrating data Merging dataset 5 6 4 into 10 9 3 7 8 1 2 Extracting anchors for merged samples Finding integration vectors Finding integration vector weights Integrating data Warning message: “Not all features provided are in this Assay object, removing the following feature(s): 11-Sep, 7-Sep, 4-Sep” Warning message: “Adding a command log without an assay associated with it”
# specify that we will perform downstream analysis on the corrected data note that the
# original unmodified data still resides in the 'RNA' assay
DefaultAssay(cncc.combined) <- "integrated"
# Run the standard workflow for visualization and clustering
cncc.combined <- ScaleData(cncc.combined, verbose = FALSE)
# Regress out cell cycle scores during data scaling【not sure】
cncc.combined <- ScaleData(cncc.combined, vars.to.regress = c("S.Score", "G2M.Score"), features = rownames(cncc.combined))
Regressing out S.Score, G2M.Score Centering and scaling data matrix
# cncc.combined <- RunPCA(cncc.combined, npcs = 30, verbose = FALSE, features = VariableFeatures(seuset))
cncc.combined <- RunPCA(cncc.combined, npcs = 30, verbose = F)
# cncc.combined <- RunTSNE(cncc.combined, reduction = "pca", dims = 1:30)
cncc.combined <- RunUMAP(cncc.combined, reduction = "pca", dims = 1:30)
18:22:20 UMAP embedding parameters a = 0.9922 b = 1.112 18:22:20 Read 51674 rows and found 30 numeric columns 18:22:20 Using Annoy for neighbor search, n_neighbors = 30 18:22:20 Building Annoy index with metric = cosine, n_trees = 50 0% 10 20 30 40 50 60 70 80 90 100% [----|----|----|----|----|----|----|----|----|----| * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * | 18:22:31 Writing NN index file to temp file /tmp/pbs.1145068.omics/RtmpEekIE7/file2e33c7d02cb55 18:22:31 Searching Annoy index using 1 thread, search_k = 3000 18:22:55 Annoy recall = 100% 18:22:56 Commencing smooth kNN distance calibration using 1 thread 18:22:59 Initializing from normalized Laplacian + noise 18:23:04 Commencing optimization for 200 epochs, with 2421742 positive edges 18:24:14 Optimization finished
cncc.combined <- FindNeighbors(cncc.combined, reduction = "pca", dims = 1:30)
cncc.combined <- FindClusters(cncc.combined, resolution = 0.7)
Computing nearest neighbor graph Computing SNN
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck Number of nodes: 51674 Number of edges: 2159422 Running Louvain algorithm... Maximum modularity in 10 random starts: 0.8849 Number of communities: 21 Elapsed time: 18 seconds
cncc.combined <- CellCycleScoring(cncc.combined, s.features = cc.genes$s.genes, g2m.features = cc.genes$g2m.genes, set.ident = F)
Warning message: “The following features are not present in the object: MCM5, PCNA, TYMS, FEN1, MCM2, MCM4, RRM1, UNG, GINS2, MCM6, CDCA7, DTL, PRIM1, UHRF1, MLF1IP, HELLS, RFC2, RPA2, NASP, RAD51AP1, GMNN, WDR76, SLBP, CCNE2, UBR7, POLD3, MSH2, ATAD2, RAD51, RRM2, CDC45, CDC6, EXO1, TIPIN, DSCC1, BLM, CASP8AP2, USP1, CLSPN, POLA1, CHAF1B, BRIP1, E2F8, not searching for symbol synonyms” Warning message: “The following features are not present in the object: HMGB2, CDK1, NUSAP1, UBE2C, BIRC5, TPX2, TOP2A, NDC80, CKS2, NUF2, CKS1B, MKI67, TMPO, CENPF, TACC3, FAM64A, SMC4, CCNB2, CKAP2L, CKAP2, AURKB, BUB1, KIF11, ANP32E, TUBB4B, GTSE1, KIF20B, HJURP, CDCA3, HN1, CDC20, TTK, CDC25C, KIF2C, RANGAP1, NCAPD2, DLGAP5, CDCA2, CDCA8, ECT2, KIF23, HMMR, AURKA, PSRC1, ANLN, LBR, CKAP5, CENPE, CTCF, NEK2, G2E3, GAS2L3, CBX5, CENPA, not searching for symbol synonyms” Warning message in AddModuleScore(object = object, features = features, name = name, : “Could not find enough features in the object from the following feature lists: S.Score Attempting to match case...Could not find enough features in the object from the following feature lists: G2M.Score Attempting to match case...”
table(cncc.combined$Phase)
G1 G2M S 25488 9656 16530
table(cncc.combined@active.ident)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 7789 4958 4423 4080 3861 3811 3797 3565 2694 2359 1974 1753 1600 1419 1157 1152 16 17 18 19 20 380 375 263 176 88
# cncc.combined$predict
DefaultAssay(cncc.combined)
markers.to.plot <- c("Tbx20","Thbs1","Meox1",
"Spon1","Lmod1","Cav1",
"Actg2","Btg2","Sost",
"Dlk1","Tshz2","Igfbp5",
"Foxf1","Pclaf","Crabp2","Igf1","Ramp2","Lum","Cenpa","Ccnb2","Birc5","Hist1h2ap","Top2a",
"Shisa2","Tcf21","Ube2c","Cenpf","Fabp7","Plp1","Ednrb",
"Tmem158","Gxylt2","Osr1","Actc1","Phox2a","Gap43","Tubb3","Cdk1",
"Ndufa4l2","Sparcl1","Cox4i2","Dct","Pmel","Gpnmb"
)
markers.to.plot[duplicated(markers.to.plot)]
cncc.combined$raw_cluster <- cncc.combined@active.ident
options(repr.plot.width=14, repr.plot.height=8)
DotPlot_order(cncc.combined, features = markers.to.plot, dot.scale = 6, col.min=-1, col.max=1, dot.min=0.4) +RotatedAxis()
Warning message: “Removed 170 rows containing missing values (geom_point).”
markers.to.plot <- c("Myh11","Cxcl12","Pdgfra","Lum","Gfra3","Cnp","Th","Slc18a3","P2ry14","Vtn","Mlana","Dct")
tmp <- c("P2ry14","Vtn")
subset(gene.anno.mm10.v3, gene_name %in% tmp)
| chr | start | end | gene_id | gene_name | gene_biotype | |
|---|---|---|---|---|---|---|
| <chr> | <int> | <int> | <chr> | <chr> | <chr> | |
| 5526 | 3 | 59113855 | 59153618 | ENSMUSG00000036381 | P2ry14 | protein_coding |
| 21844 | 11 | 78499091 | 78502324 | ENSMUSG00000017344 | Vtn | protein_coding |
options(repr.plot.width=14, repr.plot.height=8)
DotPlot_order(cncc.combined, features = markers.to.plot, assay = "RNA", dot.scale = 6) +RotatedAxis()
# options(repr.plot.width=8, repr.plot.height=4)
# p1 <- DimPlot(cncc.combined, reduction = "umap", group.by = "split")
# p2 <- DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE)
# p1 + p2
# options(repr.plot.width=8, repr.plot.height=4)
# p1 <- DimPlot(cncc.combined, reduction = "umap", group.by = "split")
# p2 <- DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE)
# p1 + p2
options(repr.plot.width=8, repr.plot.height=8)
DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE)
options(repr.plot.width=8, repr.plot.height=8)
DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = "Phase")
# cncc.combined$unsuper_cluster <- cncc.combined@active.ident
# cncc.combined$manual_anno <- plyr::mapvalues(cncc.combined@active.ident,
# from = 0:16, to = c("0c6","1c5","2c4","3c1","4c4","5c1","6c1","7c5", #0-7
# "8c7","9c1","10c2","11c3","12c8","13c5","14c2","15c9","16c10"))
# cncc.combined$manual_anno <- plyr::mapvalues(cncc.combined@active.ident,
# from = 0:16, to = c("c6","c5","c4","c1","c4","c1","c1","c5", #0-7
# "c7","c1","c2","c3","c8","c5","c2","c9","c10"))
# options(repr.plot.width=8, repr.plot.height=8)
# DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = "manual_anno")
# options(repr.plot.width=8, repr.plot.height=8)
# DimPlot(cncc.combined, reduction = "tsne", label = TRUE, repel = TRUE, group.by = "manual_anno")
# table(cncc.combined@active.ident, cncc.combined$annotation)
options(repr.plot.width=8, repr.plot.height=8)
DimPlot(cncc.combined, reduction = "umap", group.by = "ident", split.by = "split", ncol = 3)
# tmp <- rep("unknown", length.out = length(cncc.combined@active.ident))
# names(tmp) <- names(cncc.combined@active.ident)
# tmp[names(seuset@active.ident)] <- as.character(seuset@active.ident)
# tmp <- factor(tmp)
# head(tmp)
# cncc.combined$annotation <- tmp
# options(repr.plot.width=8, repr.plot.height=8)
# DimPlot(cncc.combined, reduction = "umap", group.by = "annotation", split.by = "split", ncol = 3)
# using known genes
save(cncc.combined, file = "supervised.cncc.combined.elly.zhou.Rdata")
# using known genes
# add all cells and E14.0
save(cncc.combined, file = "all.supervised.cncc.combined.elly.zhou.Rdata")
# using known genes
# add all cells and E14.0
# regression out cell cycle
save(cncc.combined, file = "all.supervised.cncc.combined.elly.zhou.removeCC.Rdata")
length(VariableFeatures(seuset))
length(VariableFeatures(integrated.cncc))
length(features)
cncc.query.anchors <- FindTransferAnchors(reference = seuset, query = integrated.cncc, dims = 1:30,
features = unique(c(VariableFeatures(seuset))))
Performing PCA on the provided reference using 1773 features as input. Projecting PCA Finding neighborhoods Finding anchors Found 12449 anchors Filtering anchors Retained 7059 anchors Extracting within-dataset neighbors
seuset$celltype <- seuset@active.ident
predictions <- TransferData(anchorset = cncc.query.anchors, refdata = seuset$celltype,dims = 1:30)
Finding integration vectors Finding integration vector weights Predicting cell labels
integrated.cncc <- AddMetaData(integrated.cncc, metadata = predictions)
tmp <- rep("unknown", length.out = length(integrated.cncc@active.ident))
names(tmp) <- names(integrated.cncc@active.ident)
tmp[names(seuset@active.ident)] <- as.character(seuset@active.ident)
tmp <- factor(tmp)
head(tmp)
integrated.cncc$annotation <- tmp
table(integrated.cncc$predicted.id, integrated.cncc$annotation)
c1 c2 c3 c4 c5 c6 unknown
c1 792 2 0 40 43 28 11977
c2 2 424 3 8 0 1 2300
c3 2 16 484 15 1 0 302
c4 21 8 5 2544 39 2 4649
c5 9 5 2 88 1549 111 5215
c6 1 0 0 3 2 143 18814
# save
tmp <- integrated.cncc$predicted.id[colnames(cncc.combined)]
cncc.combined$predict <- integrated.cncc$predicted.id[colnames(cncc.combined)]
options(repr.plot.width=8, repr.plot.height=8)
DimPlot(cncc.combined, reduction = "umap", group.by = "predict", split.by = "split", ncol = 3)
# using known genes
save(cncc.combined, file = "supervised.cncc.combined.elly.zhou.mapped.Rdata")
# add E14.0
# using known genes
save(cncc.combined, file = "all.supervised.cncc.combined.elly.zhou.mapped.Rdata")
options(repr.plot.width=8, repr.plot.height=7)
p <- DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = "ident", split.by = "predict") +
geom_vline(xintercept = -7) +
geom_hline(yintercept = 5.4)
# p
all.umap <- p$data
all.umap <- subset(all.umap, UMAP_1> -7 & UMAP_2 < 5.4)
plot(all.umap$UMAP_1, all.umap$UMAP_2)
all.umap <- subset(all.umap, !predict %in% c("c2","c3"))
head(all.umap)
| UMAP_1 | UMAP_2 | ident | predict | |
|---|---|---|---|---|
| <dbl> | <dbl> | <fct> | <chr> | |
| vcl_CCTTCGACACCAACCG | -2.612578 | -5.0383182 | 0 | c5 |
| vcl_CTCTAATTCATAGCAC | -2.160392 | -5.5188604 | 0 | c4 |
| vcl_AATCCAGGTCGAACAG | 3.421771 | 0.8463258 | 3 | c5 |
| vcl_CATCAAGTCTGTCAAG | -4.815358 | -2.0471276 | 6 | c4 |
| vcl_CAGAGAGCAGCTGGCT | 4.433164 | 0.1995236 | 3 | c5 |
| vcl_ACGGGTCTCAACACGT | -2.521075 | -4.3485137 | 0 | c4 |
save(all.umap, file = "all.umap.Rdata")
ggsave(filename = "integrated.CNCC.8.stages.pdf", width = 8, height = 7)
options(repr.plot.width=8, repr.plot.height=7)
DimPlot(cncc.combined, reduction = "tsne", label = TRUE, repel = TRUE, group.by = "predict")
options(repr.plot.width=8, repr.plot.height=8)
VlnPlot(cncc.combined, c("Ube2c", "Cdc20", "Sox10", "Fabp7", "Phox2a", "Npy", "Col2a1","Cxcl12","Pdgfra","Lum","Acta2","Tbx20"), pt.size = 0,
group.by = "predict")
ggsave(filename = "integrated.CNCC.celltype.marker.pdf", width = 8, height = 8)
options(repr.plot.width=9, repr.plot.height=9)
DimPlot(cncc.combined, reduction = "umap", group.by = "predict", split.by = "split", ncol = 3)
ggsave(filename = "integrated.CNCC.split.stages.pdf", width = 9, height = 9)
options(repr.plot.width=7, repr.plot.height=9)
FeaturePlot(cncc.combined, features = c("Lum", "Pdgfra", "Myh11", "Cxcl12", "Des", "Dct"), min.cutoff = "q8", pt.size = 0.1)
ggsave(filename = "integrated.CNCC.key.markers.pdf", width = 7, height = 9)
# DotPlot(cncc.combined, features = markers.to.plot, cols = c("blue", "red"), dot.scale = 8, split.by = "stim") +
# RotatedAxis()
save(cncc.combined, file = "cncc.combined.elly.zhou.Rdata")
# print(load("keyRdata/supervised.cncc.combined.elly.zhou.mapped.Rdata"))
print(load("keyRdata/all.supervised.cncc.combined.elly.zhou.mapped.Rdata"))
[1] "cncc.combined"
dim(cncc.combined)
table(cncc.combined$split)
Elly_E13.5 Elly_E14.0 zhou_E10.5 zhou_E11.5 zhou_E12.5 zhou_E13.5 zhou_E14.5
6564 1991 2806 7763 6178 6014 6682
zhou_E17.5 zhou_P1 zhou_P7
4341 4560 4775
6564+1991
print(load("keyRdata/all.cncc.combined.EMBO.mapped.Rdata"))
[1] "cncc.combined.sub" "cncc.markers"
source("https://raw.githubusercontent.com/satijalab/seurat/master/R/utilities.R")
PercentageFeatureSet <- function(
object,
pattern = NULL,
features = NULL,
col.name = NULL,
assay = NULL
) {
library('Matrix')
assay <- assay %||% DefaultAssay(object = object)
if (!is.null(x = features) && !is.null(x = pattern)) {
warning("Both pattern and features provided. Pattern is being ignored.")
}
# features <- features %||% grep(pattern = pattern, x = rownames(x = object[[assay]]), value = TRUE)
features <- features %||% grep(pattern = pattern, x = rownames(x = object@assays$RNA@counts), value = TRUE)
# print(head(features))
#percent.featureset <- Matrix::colSums(x = GetAssayData(object = object, slot = "counts"))[features, ]/object[[paste0("nCount_", assay)]] * 100
percent.featureset <- Matrix::colSums(x =object@assays$RNA@counts[features, ])/object[[paste0("nCount_", assay)]] * 100
if (!is.null(x = col.name)) {
object <- AddMetaData(object = object, metadata = percent.featureset, col.name = col.name)
return(object)
}
return(percent.featureset)
}
library('Matrix')
cncc.combined[["percent.mt"]] <- PercentageFeatureSet(cncc.combined, pattern = "mt-", assay = 'RNA')
tmp.group <- cncc.combined$split
tmp.group[grep("ct1", names(tmp.group))] <- "E14.0 Control"
tmp.group[grep("ct2", names(tmp.group))] <- "E13.5 Control"
tmp.group[grep("vcl", names(tmp.group))] <- "E13.5 Vcl cKO"
tmp.group <- stringr::str_replace(tmp.group, pattern = "zhou_", replacement = "Public ")
cncc.combined$split2 <- tmp.group
table(cncc.combined$split2)
1623+4941
E13.5 Control E13.5 Vcl cKO E14.0 Control Public E10.5 Public E11.5
1623 4941 1991 2806 7763
Public E12.5 Public E13.5 Public E14.5 Public E17.5 Public P1
6178 6014 6682 4341 4560
Public P7
4775
options(repr.plot.width=12, repr.plot.height=6)
VlnPlot(cncc.combined, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0, group.by = "split2")
# + geom_boxplot(width=0.1, outlier.size = 0)
ggsave(filename = "Figures/QC.pdf", width = 12, height = 6)
tmp.group <- cncc.combined.sub$split
tmp.group[grep("ct1", names(tmp.group))] <- "E14.0 Control"
tmp.group[grep("ct2", names(tmp.group))] <- "E13.5 Control"
tmp.group[grep("vcl", names(tmp.group))] <- "E13.5 Vcl cKO"
tmp.group <- stringr::str_replace(tmp.group, pattern = "zhou", replacement = "Chen_et_al")
cncc.combined.sub$split2 <- tmp.group
options(repr.plot.width=10, repr.plot.height=10)
p1 <- DimPlot(cncc.combined.sub, reduction = "umap", label = F, repel = F, label.size = 5, cols = myColors,
split.by = "split2", ncol = 3)
p1
ggsave(filename = "Figures/public_integration.pdf", width = 10, height = 10)
# options(repr.plot.width=8, repr.plot.height=8)
# DimPlot(cncc.combined, reduction = "umap", group.by = "predict", split.by = "split", ncol = 3)
table(cncc.combined@active.ident)
4 3 13 1 8 10 12 2 5 6 7 14 11 17 19 16 3861 4080 1419 4958 2694 1974 1600 4423 3811 3797 3565 1157 1753 375 176 380 18 20 9 15 263 88 2359 1152
cncc.combined$raw_cluster[is.na(cncc.combined$raw_cluster)] <- 0
cncc.combined$raw_cluster <- factor(cncc.combined$raw_cluster,
levels = as.character(c(4,3,1,13,
8,6,0,10,12,2,5,7,
14,11,
17,
19,16,18,20,
9,15)))
options(repr.plot.width=8, repr.plot.height=8)
DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = "raw_cluster")
options(repr.plot.width=14, repr.plot.height=8)
DotPlot_order(cncc.combined, features = markers.to.plot, dot.scale = 6, col.min=-1, col.max=1, dot.min=0.3,
group.by = "raw_cluster") + RotatedAxis()
Warning message: “Removed 81 rows containing missing values (geom_point).”
cncc.combined$ordered_raw_cluster <- plyr::mapvalues(cncc.combined$raw_cluster,
from = c(4,3,1,13,
8,6,0,10,12,2,5,7,
14,11,
17,
19,16,18,20,
9,15),
to = 1:21)
cncc.combined@active.ident <- cncc.combined$ordered_raw_cluster
cncc.combined@active.ident <- factor(cncc.combined@active.ident, levels = 1:21)
options(repr.plot.width=8, repr.plot.height=8)
DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE)
# options(repr.plot.width=8, repr.plot.height=8)
# DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = "Phase")
markers.to.plot <- c("Tbx20","Thbs1","Meox1",
"Spon1","Lmod1","Cav1",
"Actg2","Btg2","Sost",
"Dlk1","Tshz2","Igfbp5",
"Foxf1","Pclaf","Crabp2","Igf1","Ramp2","Lum","Cenpa","Ccnb2","Birc5","Hist1h2ap","Top2a",
"Shisa2","Tcf21","Ube2c","Cenpf","Fabp7","Plp1","Ednrb",
"Tmem158","Gxylt2","Osr1","Actc1","Phox2a","Gap43","Tubb3","Cdk1",
"Ndufa4l2","Sparcl1","Cox4i2","Dct","Pmel","Gpnmb"
)
markers.to.plot <- c(# "Top2a","Cdk1","Ube2c","Cdc20","Ccnb1", # CNCC, cell cycle
"Tbx20","Thbs1","Meox1","Igf1","Ramp2","Sox9","Lum","Dcn", # MES
"Acta2","Cxcl12","Rgs5","Myh11", # VSMC
"Phox2a","Gap43","Tubb3","Th", # Neuron
"Fabp7","Plp1","Ednrb","Sox10", # Glia
"Dct","Pmel","Gpnmb", # MLA
"Des","Cnn1"#,
#"Col2a1",
#"Crabp1","Crabp2"
)
options(repr.plot.width=12, repr.plot.height=7)
DotPlot_order(cncc.combined, features = markers.to.plot, dot.scale = 6, col.min=-1, col.max=1, dot.min=0.3) +
RotatedAxis()
Warning message: “Removed 81 rows containing missing values (geom_point).”
table(cncc.combined$stage)
E10.5 E11.5 E12.5 E13.5 E14.0 E14.5 E17.5 P1 P7 2806 7763 6178 12578 1991 6682 4341 4560 4775
cncc.combined.sub <- subset(cncc.combined, subset = ordered_raw_cluster %in% c(1:15,17:18))
cncc.combined.sub$ordered_raw_cluster <- plyr::mapvalues(cncc.combined.sub@active.ident,
from = c(1:15,17:18),
to = 1:17)
cncc.combined.sub@active.ident <- cncc.combined.sub$ordered_raw_cluster
cncc.combined.sub@active.ident <- factor(cncc.combined.sub@active.ident, levels = 1:17)
# options(repr.plot.width=14, repr.plot.height=8)
# DotPlot_order(cncc.combined.sub, features = markers.to.plot, dot.scale = 6, col.min=-1, col.max=1, dot.min=0.3) + RotatedAxis()
options(repr.plot.width=8, repr.plot.height=8)
DimPlot(cncc.combined.sub, reduction = "umap", label = TRUE, repel = TRUE)
options(repr.plot.width=7, repr.plot.height=9)
FeaturePlot(cncc.combined.sub, features = c("Lum", "Pdgfra", "Myh11", "Cxcl12", "Des", "Dct"), min.cutoff = "q8", pt.size = 0.1)
markers.to.plot <- c("Top2a","Cdk1","Ube2c","Cdc20","Ccnb1", # CNCC, cell cycle
"Pdgfra","Tbx20","Thbs1","Meox1","Igf1","Ramp2","Sox9","Lum","Dcn","Tcf21","Penk","Sfrp2", # MES
"Acta2","Cxcl12","Rgs5","Myh11","Eln","Fbln2", # VSMC
"Phox2a","Gap43","Tubb3","Th", # Neuron
"Fabp7","Plp1","Ednrb","Sox10", # Glia
"Dct","Pmel","Gpnmb","Pax3","Kit", # MLA
"Des","Cnn1",
#"Col2a1",
"Crabp1","Crabp2"
)
options(repr.plot.width=12, repr.plot.height=6)
DotPlot_order(cncc.combined.sub, features = markers.to.plot, dot.scale = 6, col.min=-1, col.max=1, dot.min=0.3) + RotatedAxis()
Warning message: “Removed 54 rows containing missing values (geom_point).”
cncc.combined.sub$ordered_raw_cluster_final <- cncc.combined.sub@active.ident
cncc.combined.sub$EMBO_anno <- plyr::mapvalues(cncc.combined.sub$ordered_raw_cluster_final,
from = 1:17,
to = c("MES-1","MES-2","MES-3","MES-4",
"VSMC-1","VSMC-2","VSMC-3","VSMC-4","VSMC-5","VSMC-6","VSMC-7","VSMC-8",
"Neuron","SWN","MLA","Mural","Mural"))
cncc.combined.sub@active.ident <- cncc.combined.sub$EMBO_anno
options(repr.plot.width=12, repr.plot.height=6)
DotPlot_order(cncc.combined.sub, features = markers.to.plot, dot.scale = 6, col.min=-1, col.max=1, dot.min=0.3) + RotatedAxis()
Warning message: “Removed 45 rows containing missing values (geom_point).”
print(load("keyRdata/all.cncc.combined.EMBO.mapped.Rdata"))
[1] "cncc.combined.sub" "cncc.markers"
table(cncc.combined.sub@active.ident)
MES-1 MES-2 MES-3 MES-4 VSMC-1 VSMC-2 VSMC-3 VSMC-4 VSMC-5 VSMC-6 VSMC-7 3861 4080 4958 1419 2694 3797 7789 1974 1600 4423 3811 VSMC-8 Neuron SWN MLA Mural 3565 1157 1753 375 643
table(cncc.combined.sub$stage)
E10.5 E11.5 E12.5 E13.5 E14.0 E14.5 E17.5 P1 P7 2268 6971 5922 12341 1957 6092 3777 4204 4367
options(repr.plot.width=8, repr.plot.height=6)
p1 <- DimPlot(cncc.combined.sub, reduction = "umap", label = TRUE, repel = TRUE, label.size = 5, cols = myColors)
p1
options(repr.plot.width=8, repr.plot.height=6)
p1 <- DimPlot(cncc.combined.sub, reduction = "umap", label = TRUE, repel = TRUE, label.size = 5, cols = myColors)
p1
ggsave(filename = "Figures/integrated.UMAP.pdf", width = 8, height = 6)
options(repr.plot.width=14, repr.plot.height=5)
FeaturePlot(cncc.combined.sub, ncol = 5, min.cutoff = "q8", pt.size = 0.2,
features = c("Lum","Pdgfra","Cxcl12","Myh11","Phox2a","Th","Sox10","Fabp7","Des","Dct"))
ggsave(filename = "Figures/integrated.marker.staining.pdf", width = 14, height = 5)
options(repr.plot.width=9, repr.plot.height=5)
FeaturePlot(cncc.combined.sub, ncol = 3, min.cutoff = "q8", pt.size = 0.2, slot = "data",
features = c('Acta2','Tagln','Eln','Fbln5','Mgp','Mylpf'))
Warning message: “Could not find Mylpf in the default search locations, found in RNA assay instead”
ggsave(filename = "Figures/integrated.marker.staining2.pdf", width = 9, height = 5)
getwd()
cncc.markers <- FindAllMarkers(cncc.combined.sub, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
Calculating cluster MES-1 Calculating cluster MES-2 Calculating cluster MES-3 Calculating cluster MES-4 Calculating cluster VSMC-1 Calculating cluster VSMC-2 Calculating cluster VSMC-3 Calculating cluster VSMC-4 Calculating cluster VSMC-5 Calculating cluster VSMC-6 Calculating cluster VSMC-7 Calculating cluster VSMC-8 Calculating cluster Neuron Calculating cluster SWN Calculating cluster MLA Calculating cluster Mural
print(load("keyRdata/all.cncc.combined.EMBO.mapped.Rdata"))
[1] "cncc.combined.sub" "cncc.markers"
length(unique(cncc.markers$cluster))
table(cncc.markers$cluster)
MES-1 MES-2 MES-3 MES-4 VSMC-1 VSMC-2 VSMC-3 VSMC-4 VSMC-5 VSMC-6 VSMC-7
168 74 186 123 80 160 61 65 35 46 64
VSMC-8 Neuron SWN MLA Mural
50 189 181 244 196
cncc.markers$`pct.1` <- NULL
cncc.markers$`pct.2` <- NULL
cncc.markers$gene <- NULL
write.csv(cncc.markers, file = "CNCC.16.cluster.csv")
save(cncc.combined.sub, cncc.markers, file = "keyRdata/all.cncc.combined.EMBO.mapped.Rdata")
head(cncc.markers)
| p_val | avg_logFC | p_val_adj | cluster | |
|---|---|---|---|---|
| <dbl> | <dbl> | <dbl> | <fct> | |
| Tagln2 | 0 | 1.0312672 | 0 | MES-1 |
| Pdlim3 | 0 | 0.9965814 | 0 | MES-1 |
| Cdk1 | 0 | 0.9807971 | 0 | MES-1 |
| Ube2c | 0 | 0.9607107 | 0 | MES-1 |
| Hapln1 | 0 | 0.9216673 | 0 | MES-1 |
| Hist1h2ap | 0 | 0.8916896 | 0 | MES-1 |
cncc.markers %>%
group_by(cluster) %>%
top_n(n = 10, wt = avg_logFC) -> top10
head(top10)
| p_val | avg_logFC | pct.1 | pct.2 | p_val_adj | cluster | gene |
|---|---|---|---|---|---|---|
| <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <fct> | <chr> |
| 0 | 1.0312672 | 1.000 | 0.967 | 0 | MES-1 | Tagln2 |
| 0 | 0.9965814 | 1.000 | 0.967 | 0 | MES-1 | Pdlim3 |
| 0 | 0.9807971 | 0.996 | 0.913 | 0 | MES-1 | Cdk1 |
| 0 | 0.9607107 | 0.974 | 0.942 | 0 | MES-1 | Ube2c |
| 0 | 0.9216673 | 0.941 | 0.823 | 0 | MES-1 | Hapln1 |
| 0 | 0.8916896 | 0.979 | 0.914 | 0 | MES-1 | Hist1h2ap |
table(cncc.combined.sub@active.ident)
MES-1 MES-2 MES-3 MES-4 VSMC-1 VSMC-2 VSMC-3 VSMC-4 VSMC-5 VSMC-6 VSMC-7 3861 4080 4958 1419 2694 3797 7789 1974 1600 4423 3811 VSMC-8 Neuron SWN MLA Mural 3565 1157 1753 375 643
options(repr.plot.width=20, repr.plot.height=20)
p <- DoHeatmap(cncc.combined.sub, features = top10$gene, combine = T, angle = 90) + NoLegend()
pdf("Figures/16.cluster.heatmap.pdf", width=15, height=15)
p
dev.off()
markers.to.plot <- c("Top2a","Cdk1","Ube2c","Cdc20","Ccnb1", # CNCC, cell cycle
"Pdgfra","Tbx20","Thbs1","Meox1","Igf1","Ramp2","Sox9","Lum","Dcn","Tcf21","Penk","Sfrp2", # MES
"Acta2","Cxcl12","Rgs5","Myh11","Eln","Fbln2", # VSMC
"Phox2a","Gap43","Tubb3","Th", # Neuron
"Fabp7","Plp1","Ednrb","Sox10", # Glia
"Dct","Pmel","Gpnmb","Pax3","Kit", # MLA
"Des","Cnn1", # Mural
#"Col2a1",
"Crabp1","Crabp2"
)
features <- c("Myh11","Cxcl12","Pdgfra","Lum","Phox2a","Th","Slc18a3","Sox10","Fabp7","Mlana","Dct","Des","Cnn1")
options(repr.plot.width=8, repr.plot.height=10)
StackedVlnPlot(obj = cncc.combined.sub, features = features, cols = myColors)
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
ggsave(filename = "Figures/integrated.marker.violin.pdf", width = 8, height = 10)
# marker gene provided by EMBO paper
known.markers.df <- read.table("marker.genes.txt", header = T)
for (i in colnames(known.markers.df)) {
tmp.module <- known.markers.df[,i]
tmp.module <- tmp.module[tmp.module %in% rownames(cncc.combined.sub@assays$integrated@scale.data)]
cncc.combined[[i]] <- colMeans(cncc.combined.sub@assays$integrated@scale.data[tmp.module,])
}
options(repr.plot.width=12, repr.plot.height=20)
VlnPlot(cncc.combined.sub, features = colnames(known.markers.df), ncol = 3, pt.size = 0)
table(cncc.combined$raw_cluster, cncc.combined$Phase)
G1 G2M S
0 3363 25 2230
1 4509 0 156
2 4325 2 269
3 3615 11 740
4 27 2045 2073
5 1 1651 2354
6 673 447 2049
7 8 1729 1430
8 2453 1 321
9 1529 477 450
10 728 987 114
11 106 603 954
12 1385 53 158
13 644 636 66
14 1054 0 73
15 295 360 444
16 505 77 225
17 59 276 66
18 11 110 237
19 5 144 60
20 43 22 103
21 22 16 46
cncc.combined$EMBO <- plyr::mapvalues(cncc.combined$raw_cluster,
from = c(4,5,7,
13,3,1,
6,10,0, 2,8,14,12,
11,16,18,
17,19,
9,15,20,21),
to = c("CNCC-1","CNCC-2","CNCC-3",
"MES-1","MES-2","MES-3",
"VSMC-1","VSMC-2","VSMC-3", "VSMC-4","VSMC-5","VSMC-6","VSMC-7",
"SWN","Neuron","MLA",
"Mural","Mural",
"Unknown","Unknown","Unknown","Unknown"))
cncc.combined$EMBO <- factor(cncc.combined$EMBO, levels = c("CNCC-1","CNCC-2","CNCC-3",
"MES-1","MES-2","MES-3",
"VSMC-1","VSMC-2","VSMC-3", "VSMC-4","VSMC-5","VSMC-6","VSMC-7",
"SWN","Neuron","MLA",
"Mural","Unknown"))
cncc.combined$EMBO2 <- plyr::mapvalues(cncc.combined$raw_cluster,
from = c(4,5,7,
13,3,1,
6,10,0, 2,8,14,12,
11,16,18,
17,19,9,15,20,21),
to = c("CNCC","CNCC","CNCC",
"MES","MES","MES",
"VSMC","VSMC","VSMC", "VSMC","VSMC","VSMC","VSMC",
"SWN","Neuron","MLA",
"Mural","Mural",
"Unknown","Unknown","Unknown","Unknown"))
cncc.combined$EMBO2 <- factor(cncc.combined$EMBO2, levels = c("CNCC","MES","VSMC","Neuron","SWN","MLA","Mural","Unknown"))
# cncc.combined@active.ident <- cncc.combined$EMBO
options(repr.plot.width=8, repr.plot.height=8)
DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = "EMBO2")
cncc.combined$split <- factor(cncc.combined$split, levels = c("zhou_E10.5","zhou_E11.5","zhou_E12.5","zhou_E13.5","Elly_E13.5",
"zhou_E14.5","zhou_E17.5","zhou_P1","zhou_P7"))
options(repr.plot.width=9, repr.plot.height=9)
DimPlot(cncc.combined, reduction = "umap", group.by = "EMBO2", split.by = "split", ncol = 3)
options(repr.plot.width=8, repr.plot.height=8)
DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = "EMBO")
cncc.combined@active.ident <- cncc.combined$EMBO2
markers.to.plot <- c("Tbx20","Thbs1","Meox1",
"Spon1","Lmod1","Cav1",
"Actg2","Btg2","Sost",
"Dlk1","Tshz2","Igfbp5",
"Foxf1","Pclaf","Crabp2","Igf1","Ramp2","Lum","Cenpa","Ccnb2","Birc5","Hist1h2ap","Top2a",
"Shisa2","Tcf21","Ube2c","Cenpf","Fabp7","Plp1","Ednrb",
"Tmem158","Gxylt2","Osr1","Actc1","Phox2a","Gap43","Tubb3","Cdk1",
"Ndufa4l2","Sparcl1","Cox4i2","Dct","Pmel","Gpnmb"
)
table(cncc.combined@active.ident)
VSMC MES CNCC Unknown SWN Neuron Mural MLA 20710 10377 11318 3807 1663 807 610 358
cncc.combined@active.ident <- factor(cncc.combined@active.ident,
levels = rev(c("CNCC","MES","VSMC","Neuron","SWN","MLA","Mural","Unknown")))
markers.to.plot <- c("Top2a","Cdk1","Ube2c","Cdc20","Ccnb1", # CNCC
"Tbx20","Thbs1","Meox1","Igf1","Ramp2","Sox9","Lum","Dcn", # MES
"Acta2","Cxcl12","Rgs5","Myh11", # VSMC
"Phox2a","Gap43","Tubb3","Th", # Neuron
"Fabp7","Plp1","Ednrb","Sox10", # Glia
"Dct","Pmel","Gpnmb", # MLA
"Des","Cnn1",
"Col2a1"
)
options(repr.plot.width=11, repr.plot.height=5)
DotPlot_order(cncc.combined, features = markers.to.plot, dot.scale = 8, col.min=-1, col.max=2, dot.min=0.4) + RotatedAxis()
Warning message: “Removed 39 rows containing missing values (geom_point).”
cncc.combined@active.ident <- factor(cncc.combined@active.ident,
levels = c("CNCC","MES","VSMC","Neuron","SWN","MLA","Mural","Unknown"))
options(repr.plot.width=8, repr.plot.height=8)
DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE)
dim(cncc.combined@assays$RNA@counts)
cncc.markers <- FindAllMarkers(cncc.combined, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
Calculating cluster CNCC Calculating cluster MES Calculating cluster VSMC Calculating cluster Neuron Calculating cluster SWN Calculating cluster MLA Calculating cluster Mural Calculating cluster Unknown
library(dplyr)
# install.packages("dplyr")
packageVersion("dplyr")
[1] ‘0.8.5’
cncc.markers %>%
group_by(cluster) %>%
top_n(n = 10, wt = avg_logFC) -> top10
pdf("Figures/lineage.markers.heatmap.new.clusters.pdf", width=12, height=10)
DoHeatmap(cncc.combined, features = top10$gene) + NoLegend()
dev.off()
# using known genes
save(cncc.combined, cncc.markers, file = "supervised.cncc.combined.elly.zhou.mapped.Rdata")
getwd()
print(load("supervised.cncc.combined.elly.zhou.mapped.Rdata"))
[1] "cncc.combined" "cncc.markers"
markers.to.plot <- c("Top2a","Cdk1","Ube2c","Cdc20","Ccnb1", # CNCC
"Tbx20","Thbs1","Meox1","Igf1","Ramp2","Sox9","Lum","Dcn", # MES
"Acta2","Cxcl12","Rgs5","Myh11", # VSMC
"Phox2a","Gap43","Tubb3","Th", # Neuron
"Fabp7","Plp1","Ednrb","Sox10", # Glia
"Dct","Pmel","Gpnmb", # MLA
"Des","Cnn1",
"Col2a1"
)
options(repr.plot.width=11, repr.plot.height=5)
DotPlot_order(cncc.combined, features = markers.to.plot, dot.scale = 8, col.min=-1, col.max=2, dot.min=0.4) + RotatedAxis()
Warning message: “Removed 39 rows containing missing values (geom_point).”
cncc.combined@active.ident <- cncc.combined$EMBO
options(repr.plot.width=11, repr.plot.height=6)
DotPlot_order(cncc.combined, features = markers.to.plot, dot.scale = 8, col.min=-1, col.max=2, dot.min=0.4) + RotatedAxis()
Warning message: “Removed 100 rows containing missing values (geom_point).”
cncc.combined$EMBO3 <- plyr::mapvalues(cncc.combined$raw_cluster,
from = c(4,5,7,
13,3,1,
6,10,0, 2,8,14,12,
11,16,18,
17,19,
9,15,20,21),
to = c("MES-1","VSMC-2","VSMC-1",
"MES-2","MES-3","MES-4",
"VSMC-3","VSMC-4","VSMC-5", "VSMC-6","VSMC-7","VSMC-8","VSMC-9",
"SWN","Neuron","MLA",
"Mural","Mural",
"Unknown","Unknown","Unknown","Unknown"))
cncc.combined$EMBO3 <- factor(cncc.combined$EMBO3, levels = c("MES-1","MES-2","MES-3","MES-4",
"VSMC-1","VSMC-2","VSMC-3","VSMC-4", "VSMC-5","VSMC-6","VSMC-7","VSMC-8","VSMC-9",
"SWN","Neuron","MLA",
"Mural","Unknown"))
cncc.combined@active.ident <- cncc.combined$EMBO3
options(repr.plot.width=11, repr.plot.height=6)
DotPlot_order(cncc.combined, features = markers.to.plot, dot.scale = 8, col.min=-1, col.max=2, dot.min=0.4) + RotatedAxis()
Warning message: “Removed 100 rows containing missing values (geom_point).”
options(repr.plot.width=8, repr.plot.height=8)
DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = "EMBO3")
cncc.combined$EMBO4 <- plyr::mapvalues(cncc.combined$raw_cluster,
from = c(4,5,7,
13,3,1,
6,10,0, 2,8,14,12,
11,16,18,
17,19,
9,15,20,21),
to = c("MES","VSMC","VSMC",
"MES","MES","MES",
"VSMC","VSMC","VSMC", "VSMC","VSMC","VSMC","VSMC",
"SWN","Neuron","MLA",
"Mural","Mural",
"Unknown","Unknown","Unknown","Unknown"))
cncc.combined$EMBO4 <- factor(cncc.combined$EMBO4, levels = c("MES","VSMC","Neuron","SWN","MLA","Mural","Unknown"))
options(repr.plot.width=8, repr.plot.height=8)
DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = "EMBO4")
cncc.combined@active.ident <- cncc.combined$EMBO4
options(repr.plot.width=11, repr.plot.height=4)
DotPlot_order(cncc.combined, features = markers.to.plot, dot.scale = 8, col.min=-1, col.max=2, dot.min=0.4) + RotatedAxis()
Warning message: “Removed 34 rows containing missing values (geom_point).”
print(load("all.cncc.combined.EMBO.mapped.Rdata"))
[1] "cncc.combined.sub" "cncc.markers"
# API
groups <- cncc.combined.sub@active.ident
exprM <- cncc.combined.sub@assays$integrated@scale.data[,names(groups)]
names(table(groups))
merged.exprM <- data.frame()
# samples <- c("IMR_ENCC", "UE_ENCC", "HSCR_5c3","HSCR_20c7","HSCR_10c2","HSCR_1c11","HSCR_17c8","HSCR_23c9")
samples <- unique(groups)
for (i in samples) {
print(i)
tmp.exprM <- exprM[,groups==i]
tmp.exprM.merge <- rowMeans(tmp.exprM)
merged.exprM <- rbind(merged.exprM, tmp.exprM.merge)
# break
}
#
merged.exprM <- as.data.frame(t(merged.exprM))
colnames(merged.exprM) <- samples
rownames(merged.exprM) <- names(tmp.exprM.merge)
[1] "VSMC-7" [1] "SWN" [1] "Neuron" [1] "MES-2" [1] "VSMC-8" [1] "VSMC-6" [1] "MES-3" [1] "VSMC-3" [1] "MES-4" [1] "VSMC-2" [1] "VSMC-1" [1] "VSMC-4" [1] "VSMC-5" [1] "Mural" [1] "MES-1" [1] "MLA"
# devtools::install_version("phangorn", version = "2.5.5", repos = "http://cran.us.r-project.org")
# # install.packages('devtools')
# devtools::install_github("jingwyang/TreeExp", dependencies = F)
library(TreeExp)
disM <- dist.euc(merged.exprM[,])
dim(disM)
Attaching package: ‘TreeExp’
The following object is masked from ‘package:base’:
print
rownames(disM) <- colnames(merged.exprM)
colnames(disM) <- colnames(merged.exprM)
# # cluster order
# disM <- disM[,as.character(1:17)]
disM[is.na(disM)] <- 0
tr <- NJ(disM)
tr$tip.label
tr <- ape::root(tr, outgroup = "MLA")
library("ggtree")
Registered S3 method overwritten by 'treeio': method from root.phylo ape ggtree v2.0.4 For help: https://yulab-smu.github.io/treedata-book/ If you use ggtree in published research, please cite the most appropriate paper(s): - Guangchuang Yu, Tommy Tsan-Yuk Lam, Huachen Zhu, Yi Guan. Two methods for mapping and visualizing associated data on phylogeny using ggtree. Molecular Biology and Evolution 2018, 35(12):3041-3043. doi: 10.1093/molbev/msy194 - Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam. ggtree: an R package for visualization and annotation of phylogenetic trees with their covariates and other associated data. Methods in Ecology and Evolution 2017, 8(1):28-36, doi:10.1111/2041-210X.12628
myColors[1:12]
# names(myColors) <- as.character(1:17)
tr$tip.label <- factor(tr$tip.label, levels = c('MES-1','MES-2','MES-3','MES-4','VSMC-1','VSMC-2','VSMC-3',
'VSMC-4','VSMC-5','VSMC-6','VSMC-7','VSMC-8','Neuron','SWN','MLA','Mural'))
# tr$tip.label <- factor(1:17, levels = 1:17)
myColors <- myColors[as.numeric(tr$tip.label)]
tr$tip.label <- as.character(tr$tip.label)
options(repr.plot.width=8, repr.plot.height=7)
display.brewer.all()
# all gennes
options(repr.plot.width=12, repr.plot.height=3)
ggtree(tr, branch.length="none", linetype = "solid", size = 1) +
# theme_tree2() +
# vjust: + -> down; hjust: + -> left
geom_tiplab(size=6, color=myColors, align=F, hjust = 0.5, vjust = 2, angle = 30, fontface = "bold") + # normal hjust = 0.5, vjust = 1.5
#scale_y_continuous(limits = c(0, 5)) +
coord_flip() +
scale_x_reverse(expand = c(0,3)) +
labs(x = "", y = "", title = "Lineage Cladogram") +
theme(plot.title = element_text(hjust = 0.5, vjust = -7, face = "bold", size=20))
Warning message: “`data_frame()` is deprecated as of tibble 1.1.0. Please use `tibble()` instead. This warning is displayed once every 8 hours. Call `lifecycle::last_warnings()` to see where this warning was generated.”
ggsave(filename = "Figures/integrated.Lineage.Cladogram.pdf", width = 12, height = 3)
# all gennes
options(repr.plot.width=9, repr.plot.height=9)
ggtree(tr, linetype = "solid", size = 1, layout="daylight", branch.length = 'none') + #
# theme_tree2() +
# vjust: + -> down; hjust: + -> left
geom_tiplab(size=6, color=myColors, align=F, angle = 0, vjust = 0, fontface = "bold") +
#scale_y_continuous(limits = c(0, 5)) +
coord_flip() +
scale_x_reverse(expand = c(0,3)) +
labs(x = "", y = "", title = "Lineage Cladogram") +
theme(plot.title = element_text(hjust = 0.5, vjust = -4, face = "bold", size=20))
Average angle change [1] 0.112445406660657 Average angle change [2] 0.0357877565782561
print(load("all.supervised.cncc.combined.elly.zhou.mapped.Rdata"))
[1] "cncc.combined"
table(cncc.combined$split)
Elly_E13.5 Elly_E14.0 zhou_E10.5 zhou_E11.5 zhou_E12.5 zhou_E13.5 zhou_E14.5
6564 1991 2806 7763 6178 6014 6682
zhou_E17.5 zhou_P1 zhou_P7
4341 4560 4775
print(load("all.cncc.combined.EMBO.mapped.Rdata"))
[1] "cncc.combined.sub" "cncc.markers"
options(repr.plot.width=8, repr.plot.height=7)
p <- DimPlot(cncc.combined.sub, reduction = "umap", label = TRUE, repel = TRUE, group.by = "ident", split.by = "split") +
geom_vline(xintercept = -7) +
geom_hline(yintercept = 5.4)
# p
all.umap <- p$data
Warning message:
“Using `as.character()` on a quosure is deprecated as of rlang 0.3.0.
Please use `as_label()` or `as_name()` instead.
This warning is displayed once per session.”
head(all.umap)
| UMAP_1 | UMAP_2 | ident | split | |
|---|---|---|---|---|
| <dbl> | <dbl> | <fct> | <chr> | |
| vcl_CCTTCGACACCAACCG | -4.80720965 | -1.920804 | VSMC-7 | Elly_E13.5 |
| vcl_CTCTAATTCATAGCAC | -5.10676066 | -2.119910 | VSMC-7 | Elly_E13.5 |
| vcl_CAGAGAGCAGCGAACA | 4.45797499 | -11.141129 | SWN | Elly_E13.5 |
| vcl_GCATGCGTCTGCCCTA | -0.03043394 | -8.702329 | Neuron | Elly_E13.5 |
| vcl_AATCCAGGTCGAACAG | 0.95436476 | 5.041812 | MES-2 | Elly_E13.5 |
| vcl_CATCAAGTCTGTCAAG | -0.60639481 | -3.524776 | VSMC-8 | Elly_E13.5 |
# all.umap <- subset(all.umap, UMAP_1> -7 & UMAP_2 < 5.4)
all.umap <- subset(all.umap, UMAP_2 > -6)
all.umap <- subset(all.umap, !(UMAP_1>2 & UMAP_2<0))
ggplot(all.umap, aes(x=UMAP_1, y=UMAP_2)) +
geom_point() +
geom_vline(xintercept = 2) +
geom_hline(yintercept = 0) +
geom_hline(yintercept = -6)
# plot(all.umap$UMAP_1, all.umap$UMAP_2)
# all.umap <- subset(all.umap, ident %in% 1:12)
# all.umap <- subset(all.umap, !predict %in% c("c2","c3"))
head(all.umap)
| UMAP_1 | UMAP_2 | ident | split | |
|---|---|---|---|---|
| <dbl> | <dbl> | <fct> | <chr> | |
| vcl_CCTTCGACACCAACCG | -4.8072097 | -1.920804 | VSMC-7 | Elly_E13.5 |
| vcl_CTCTAATTCATAGCAC | -5.1067607 | -2.119910 | VSMC-7 | Elly_E13.5 |
| vcl_AATCCAGGTCGAACAG | 0.9543648 | 5.041812 | MES-2 | Elly_E13.5 |
| vcl_CATCAAGTCTGTCAAG | -0.6063948 | -3.524776 | VSMC-8 | Elly_E13.5 |
| vcl_CAGAGAGCAGCTGGCT | -2.3134097 | -1.675376 | VSMC-6 | Elly_E13.5 |
| vcl_ACGGGTCTCAACACGT | -3.7033431 | -3.059830 | VSMC-6 | Elly_E13.5 |
save(all.umap, file = "all.umap.rmCC.Rdata")
all.umap$cluster <- all.umap$ident
all.umap <- all.umap[grep("VSMC|MES", all.umap$cluster),]
all.umap.elly <- all.umap[grep("ct2|vcl", rownames(all.umap)),]
all.umap.elly$rawName <- rownames(all.umap.elly)
tmp.count <- as.matrix(cncc.combined.sub@assays$RNA@counts[,all.umap.elly$rawName])
write.table(t(tmp.count), quote = F, file = "/home/lizhixin/project/scPipeline/SPRING/SPRING_dev/data_prep/vcl/E13.5_vcl.tsv", sep="\t")
write.table(rownames(tmp.count), quote = F, row.names = F, col.names = F, file = "/home/lizhixin/project/scPipeline/SPRING/SPRING_dev/data_prep/vcl/genes.txt", sep="\t")
dim(tmp.count)
tmp.df <- read.csv("/home/lizhixin/project/scPipeline/SPRING/SPRING_dev/datasets/vcl/Elly/color_data_gene_sets.csv", header = F, row.names = 1)
Warning message in read.table(file = file, header = header, sep = sep, quote = quote, : “incomplete final line found by readTableHeader on '/home/lizhixin/project/scPipeline/SPRING/SPRING_dev/datasets/vcl/Elly/color_data_gene_sets.csv'”
dim(tmp.df)
all.umap.elly$group <- "Vcl cKO"
all.umap.elly[grep("ct", all.umap.elly$rawName),]$group <- "Control"
head(all.umap.elly)
| UMAP_1 | UMAP_2 | ident | split | cluster | rawName | group | |
|---|---|---|---|---|---|---|---|
| <dbl> | <dbl> | <fct> | <chr> | <fct> | <chr> | <chr> | |
| vcl_CCTTCGACACCAACCG | -4.8072097 | -1.920804 | VSMC-7 | Elly_E13.5 | VSMC-7 | vcl_CCTTCGACACCAACCG | VclcKO |
| vcl_CTCTAATTCATAGCAC | -5.1067607 | -2.119910 | VSMC-7 | Elly_E13.5 | VSMC-7 | vcl_CTCTAATTCATAGCAC | VclcKO |
| vcl_AATCCAGGTCGAACAG | 0.9543648 | 5.041812 | MES-2 | Elly_E13.5 | MES-2 | vcl_AATCCAGGTCGAACAG | VclcKO |
| vcl_CATCAAGTCTGTCAAG | -0.6063948 | -3.524776 | VSMC-8 | Elly_E13.5 | VSMC-8 | vcl_CATCAAGTCTGTCAAG | VclcKO |
| vcl_CAGAGAGCAGCTGGCT | -2.3134097 | -1.675376 | VSMC-6 | Elly_E13.5 | VSMC-6 | vcl_CAGAGAGCAGCTGGCT | VclcKO |
| vcl_ACGGGTCTCAACACGT | -3.7033431 | -3.059830 | VSMC-6 | Elly_E13.5 | VSMC-6 | vcl_ACGGGTCTCAACACGT | VclcKO |
# tmp.df <- rbind(tmp.df, as.character(all.umap.elly$cluster), all.umap.elly$split)
# rownames(tmp.df) <- c("Total Counts","Uniform","Cluster","Split")
# tmp.df[1:4,1:5]
write.table(all.umap.elly, quote = F,sep = ",",row.names = T,col.names = T, file = "/home/lizhixin/project/scPipeline/SPRING/SPRING_dev/datasets/vcl/cellanno.csv")
spring <- read.csv("coordinates.txt", header = F)
all.umap.elly$SPRING1 <- spring$V2
all.umap.elly$SPRING2 <- spring$V3
head(all.umap.elly)
| UMAP_1 | UMAP_2 | ident | split | cluster | rawName | group | SPRING1 | SPRING2 | |
|---|---|---|---|---|---|---|---|---|---|
| <dbl> | <dbl> | <fct> | <chr> | <fct> | <chr> | <chr> | <dbl> | <dbl> | |
| vcl_CCTTCGACACCAACCG | -4.8072097 | -1.920804 | VSMC-7 | Elly_E13.5 | VSMC-7 | vcl_CCTTCGACACCAACCG | VclcKO | 1017.8566 | 95.23537 |
| vcl_CTCTAATTCATAGCAC | -5.1067607 | -2.119910 | VSMC-7 | Elly_E13.5 | VSMC-7 | vcl_CTCTAATTCATAGCAC | VclcKO | 754.9424 | 349.01373 |
| vcl_AATCCAGGTCGAACAG | 0.9543648 | 5.041812 | MES-2 | Elly_E13.5 | MES-2 | vcl_AATCCAGGTCGAACAG | VclcKO | 1018.6995 | 73.98390 |
| vcl_CATCAAGTCTGTCAAG | -0.6063948 | -3.524776 | VSMC-8 | Elly_E13.5 | VSMC-8 | vcl_CATCAAGTCTGTCAAG | VclcKO | 494.7210 | 669.97602 |
| vcl_CAGAGAGCAGCTGGCT | -2.3134097 | -1.675376 | VSMC-6 | Elly_E13.5 | VSMC-6 | vcl_CAGAGAGCAGCTGGCT | VclcKO | 1032.1202 | 172.69868 |
| vcl_ACGGGTCTCAACACGT | -3.7033431 | -3.059830 | VSMC-6 | Elly_E13.5 | VSMC-6 | vcl_ACGGGTCTCAACACGT | VclcKO | 798.3132 | 514.97958 |
print(load("all.umap.elly.SPRING.Rdata"))
[1] "all.umap.elly" "all.umap.elly.b"
centers <- all.umap.elly %>% dplyr::group_by(cluster) %>% summarize(SPRING1 = median(SPRING1),
SPRING2 = median(SPRING2))
library(ggrepel)
table(all.umap.elly$group)
Control Vcl cKO 1385 4298
all.umap.elly.b <- rbind(subset(all.umap.elly, group=="Control"),
subset(all.umap.elly, group=="Vcl cKO")[sample(1:4298, 1385),])
options(repr.plot.width=8, repr.plot.height=5)
pcag <- ggplot(all.umap.elly.b, aes(x=SPRING1, y=-SPRING2, color=cluster)) +
# facet_grid(cols = vars(variable)) +
facet_wrap( ~ group, ncol=2) + # error in border
geom_point(size=1, alpha=1) +
# geom_density_2d(color='black', size=0.05, alpha=0.15) +
geom_text_repel(data = centers, mapping = aes(label = cluster), size = 4.5, color="black", fontface = "bold") +
# geom_line(data=pc.line1, color='red', size=0.5) +
labs(x = "SPRING Dim-1",y = "SPRING Dim-2", title = "") +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(legend.title=element_blank()) +
theme(legend.position = "none") +
theme(strip.background = element_rect(fill = "gray97", color = NA)) + # strip background color
theme(strip.placement = "outside", strip.text.x = element_text(face="plain", size = 14), #italic
strip.text.y = element_text(face="plain", size = 11)) +
theme(panel.spacing=unit(.3, "lines"),panel.border = element_rect(color = "black", fill = NA, size = 0.2,colour = "black")) + #line size
scale_color_manual(values=myColors)
pcag
ggsave(filename = "SPRING.vcl.control.pdf", width = 8, height = 5)
save(all.umap.elly, all.umap.elly.b, file = "/home/lizhixin/project/Data_center/analysis/all.umap.elly.SPRING.Rdata")
getwd()
table(cncc.combined.sub@active.ident, cncc.combined.sub$stage)
E10.5 E11.5 E12.5 E13.5 E14.0 E14.5 E17.5 P1 P7
MES-1 193 1335 637 1039 42 399 63 76 77
MES-2 168 586 401 1219 73 593 211 261 568
MES-3 159 625 622 1554 67 1284 224 205 218
MES-4 84 548 215 330 20 135 21 19 47
VSMC-1 179 399 346 543 75 310 368 294 180
VSMC-2 609 801 850 809 211 221 112 118 66
VSMC-3 108 528 347 791 156 1062 1174 1820 1803
VSMC-4 49 211 214 241 58 301 393 328 179
VSMC-5 103 282 227 375 58 170 162 132 91
VSMC-6 54 175 337 2132 407 193 295 481 349
VSMC-7 48 510 826 1210 227 285 199 224 282
VSMC-8 416 914 736 1089 215 145 14 31 5
Neuron 7 5 40 493 180 382 37 6 7
SWN 80 20 101 349 162 346 126 133 436
MLA 0 17 4 43 3 193 52 34 29
Mural 11 15 19 124 3 73 326 42 30
tmp.df <- cncc.combined.sub@meta.data
tmp.df <- tmp.df[grep("ct2|vcl", tmp.df$cellName),]
head(tmp.df)
| orig.ident | nCount_RNA | nFeature_RNA | stage | cellName | lab | split | percent.mt | integrated_snn_res.0.7 | seurat_clusters | ⋯ | c15_n | c16_n | c17_n | c18_n | c19_n | c20_n | ordered_raw_cluster | cluster_number | ordered_raw_cluster_final | EMBO_anno | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <chr> | <dbl> | <int> | <chr> | <chr> | <chr> | <chr> | <dbl> | <fct> | <fct> | ⋯ | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <fct> | <fct> | <fct> | <fct> | |
| vcl_CCTTCGACACCAACCG | vcl | 6016 | 1999 | E13.5 | vcl_CCTTCGACACCAACCG | Elly | Elly_E13.5 | 3.474069 | 5 | 5 | ⋯ | 0.017746340 | -0.3405605 | 0.02470102 | -0.002038496 | -0.13094563 | -0.20288788 | 11 | 12 | 11 | VSMC-7 |
| vcl_CTCTAATTCATAGCAC | vcl | 7489 | 2688 | E13.5 | vcl_CTCTAATTCATAGCAC | Elly | Elly_E13.5 | 3.378288 | 5 | 5 | ⋯ | 0.064430912 | -0.3335513 | 0.07300808 | -0.375260891 | -0.13539847 | -0.07655114 | 11 | 12 | 11 | VSMC-7 |
| vcl_CAGAGAGCAGCGAACA | vcl | 22714 | 5129 | E13.5 | vcl_CAGAGAGCAGCGAACA | Elly | Elly_E13.5 | 3.249097 | 11 | 11 | ⋯ | 0.011794468 | -0.3477211 | 0.15103338 | -0.033179337 | -0.22253936 | 0.20032665 | 14 | 14 | 14 | SWN |
| vcl_GCATGCGTCTGCCCTA | vcl | 53839 | 7200 | E13.5 | vcl_GCATGCGTCTGCCCTA | Elly | Elly_E13.5 | 2.936533 | 14 | 14 | ⋯ | -0.028763796 | -0.2270507 | 1.34551991 | -0.050874703 | 0.03675817 | 0.22297381 | 13 | 13 | 13 | Neuron |
| vcl_AATCCAGGTCGAACAG | vcl | 10290 | 2985 | E13.5 | vcl_AATCCAGGTCGAACAG | Elly | Elly_E13.5 | 4.042760 | 3 | 3 | ⋯ | 0.001014918 | 0.1127407 | -0.11073191 | 0.105957097 | 0.28313209 | 0.01824923 | 2 | 1 | 2 | MES-2 |
| vcl_CATCAAGTCTGTCAAG | vcl | 8142 | 2842 | E13.5 | vcl_CATCAAGTCTGTCAAG | Elly | Elly_E13.5 | 7.590273 | 7 | 7 | ⋯ | 0.045419791 | -0.1312454 | 0.02235031 | -0.229241711 | -0.03896710 | -0.18024788 | 12 | 10 | 12 | VSMC-8 |
write.table(table(tmp.df$EMBO_anno, tmp.df$orig.ident), file = "tmp.table.txt", sep = "\t")
library(dplyr)
countTable <- tmp.df %>% dplyr::group_by(orig.ident) %>% dplyr::count(EMBO_anno) %>% dplyr::mutate(prop = n/sum(n))
subset(countTable, EMBO_anno=="VSMC-3")
| orig.ident | EMBO_anno | n | prop |
|---|---|---|---|
| <chr> | <fct> | <int> | <dbl> |
| ct2 | VSMC-3 | 64 | 0.03975155 |
| vcl | VSMC-3 | 226 | 0.04619787 |
head(countTable)
| orig.ident | EMBO_anno | n | prop |
|---|---|---|---|
| <chr> | <fct> | <int> | <dbl> |
| ct2 | MES-1 | 22 | 0.013664596 |
| ct2 | MES-2 | 69 | 0.042857143 |
| ct2 | MES-3 | 5 | 0.003105590 |
| ct2 | MES-4 | 8 | 0.004968944 |
| ct2 | VSMC-1 | 71 | 0.044099379 |
| ct2 | VSMC-2 | 247 | 0.153416149 |
options(repr.plot.width=3.5, repr.plot.height=6)
library(scales)
g <- ggplot(data=countTable, aes(x=orig.ident, y=prop, fill=EMBO_anno)) +
geom_bar(stat="identity", position="fill", alpha=1) +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
labs(x = "",y = "Percentage of Population (%)", title = " ") +
theme(axis.text.x = element_text(face="plain", angle=30, size = 15, color = "black", vjust=0.5),
axis.text.y = element_text(face="plain", size = 15, color = "black"),
axis.title =element_text(size = 15)) +
scale_y_continuous(labels = percent_format(suffix="")) +
# coord_flip() +
# change legend position and size
theme(legend.title=element_blank(), axis.ticks.x=element_blank()) +
# guides(colour = guide_legend(override.aes = list(size=3))) +
#
#geom_text(aes(label = scales::percent(prop, accuracy = 0.1),
# y = text.y, group = stage), position = position_dodge(width = 0.9), vjust = 0.5, size=5) +
scale_fill_manual(values=myColors, guide = guide_legend(reverse=F)) +
scale_x_discrete(labels=c("ct2" = "Control", "vcl" = "Vcl cKO"))
g
ggsave(filename = "Figures/integrated.percentage.pdf", width = 3.5, height = 6)
cluster.list <- list("DE1"=c("VSMC-2","VSMC-8"), "DE2"=c("VSMC-4"), "DE3"=c("MES-2"))
ident.1 = "Vcl cKO"
ident.2 = "Control"
# prepare group
tmp.group <- rep("Control", length.out = length(cncc.combined.sub@active.ident))
names(tmp.group) <- names(cncc.combined.sub@active.ident)
tmp.group[grepl("vcl", names(tmp.group))] <- "Vcl cKO"
tmp.group <- factor(tmp.group, levels = c("Control", "Vcl cKO"))
cncc.combined.sub$group <- tmp.group
table(cncc.combined.sub$group)
Control Vcl cKO 43007 4892
# prepare cluster
cncc.combined.sub$cluster <- as.character(cncc.combined.sub@active.ident)
table(cncc.combined.sub$cluster)
MES-1 MES-2 MES-3 MES-4 MLA Mural Neuron SWN VSMC-1 VSMC-2 VSMC-3 3861 4080 4958 1419 375 643 1157 1753 2694 3797 7789 VSMC-4 VSMC-5 VSMC-6 VSMC-7 VSMC-8 1974 1600 4423 3811 3565
seuratObj <- subset(cncc.combined.sub, subset = split=="Elly_E13.5")
table(seuratObj$group, seuratObj$cluster)
MES-1 MES-2 MES-3 MES-4 MLA Mural Neuron SWN VSMC-1 VSMC-2 VSMC-3
Control 22 69 5 8 11 2 139 61 71 247 64
Vcl cKO 37 154 40 33 8 16 273 257 211 464 226
VSMC-4 VSMC-5 VSMC-6 VSMC-7 VSMC-8
Control 36 41 392 161 281
Vcl cKO 34 198 1525 773 643
# Vcl.DEGs <- DEG.cluster.list(seuratObj, cluster.list)
DEG.1by1 <- function(seuratObj, ident.1 = "Vcl cKO", ident.2 = "Control", assay = "RNA") {
DEGs <- list()
for (i in unique(seuratObj@active.ident)) {
message(sprintf("identifying DEGs in %s between %s and %s...", i, ident.1, ident.2))
# add log2FC and correlation
cells_1 <- rownames(subset(seuratObj@meta.data, cluster==i & group==ident.2))
cells_2 <- rownames(subset(seuratObj@meta.data, cluster==i & group==ident.1))
if (length(cells_1)<5 | length(cells_2)<5) next
log2fc <- log2(rowMeans(seuratObj@assays$RNA@data[,cells_2])+1) - log2(rowMeans(seuratObj@assays$RNA@data[,cells_1])+1)
corM <- cor(t(as.matrix(seuratObj@assays$RNA@data[,c(cells_1,cells_2)])), c(rep(0, length.out = length(cells_1)),
rep(1, length.out = length(cells_2))
))
corM[is.na(corM)] <- 0
#
tmp.seuratObj <- subset(seuratObj, subset = cluster == i)
tmp.seuratObj@active.ident <- tmp.seuratObj$group
tmp.DEGs <- FindMarkers(tmp.seuratObj, ident.1 = ident.1, ident.2 = ident.2, only.pos = F,
min.pct = 0, min.diff.pct = "-Inf", logfc.threshold = 0, assay = assay)
if(dim(tmp.DEGs)[1]<1) {
message(sprintf("skipping %s, no DEGs found...", i))
next
}
DEGs[[i]] <- add.missing.DEGs(tmp.DEGs, rownames(tmp.seuratObj@assays$RNA@counts))
# fill emplty genes
tmp.empty <- rowSums(DEGs[[i]])==0
DEGs[[i]][tmp.empty,]$p_val <- 1
DEGs[[i]][tmp.empty,]$p_val_adj <- 1
# add gene column
DEGs[[i]]$gene <- rownames(DEGs[[i]])
# write to df
DEGs[[i]]$log2FC <- log2fc[DEGs[[i]]$gene]
DEGs[[i]]$correlation <- as.data.frame(corM)[DEGs[[i]]$gene,]
#
}
return(DEGs)
}
Vcl.DEGs <- DEG.1by1(seuratObj, ident.1 = "Vcl cKO", ident.2 = "Control")
identifying DEGs in VSMC-7 between Vcl cKO and Control... Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), : “the standard deviation is zero” identifying DEGs in SWN between Vcl cKO and Control... Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), : “the standard deviation is zero” identifying DEGs in Neuron between Vcl cKO and Control... Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), : “the standard deviation is zero” identifying DEGs in MES-2 between Vcl cKO and Control... Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), : “the standard deviation is zero” identifying DEGs in VSMC-8 between Vcl cKO and Control... Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), : “the standard deviation is zero” identifying DEGs in VSMC-6 between Vcl cKO and Control... Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), : “the standard deviation is zero” identifying DEGs in MES-3 between Vcl cKO and Control... Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), : “the standard deviation is zero” identifying DEGs in VSMC-3 between Vcl cKO and Control... Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), : “the standard deviation is zero” identifying DEGs in MES-4 between Vcl cKO and Control... Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), : “the standard deviation is zero” identifying DEGs in VSMC-2 between Vcl cKO and Control... Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), : “the standard deviation is zero” identifying DEGs in VSMC-1 between Vcl cKO and Control... Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), : “the standard deviation is zero” identifying DEGs in VSMC-4 between Vcl cKO and Control... Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), : “the standard deviation is zero” identifying DEGs in VSMC-5 between Vcl cKO and Control... Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), : “the standard deviation is zero” identifying DEGs in Mural between Vcl cKO and Control... identifying DEGs in MES-1 between Vcl cKO and Control... Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), : “the standard deviation is zero” identifying DEGs in MLA between Vcl cKO and Control... Warning message in cor(t(as.matrix(seuratObj@assays$RNA@data[, c(cells_1, cells_2)])), : “the standard deviation is zero”
Vcl.DEGs.sig <- list()
for (i in names(Vcl.DEGs)) {
print(i)
Vcl.DEGs.sig[[i]] <- subset(Vcl.DEGs[[i]], p_val<0.05 & abs(log2FC)>0.01 & abs(correlation)>0.15 & log2FC*correlation>0)
print(dim(Vcl.DEGs.sig[[i]]))
}
[1] "VSMC-7" [1] 275 8 [1] "SWN" [1] 599 8 [1] "Neuron" [1] 1133 8 [1] "MES-2" [1] 1338 8 [1] "VSMC-8" [1] 414 8 [1] "VSMC-6" [1] 240 8 [1] "MES-3" [1] 1201 8 [1] "VSMC-3" [1] 844 8 [1] "MES-4" [1] 1194 8 [1] "VSMC-2" [1] 764 8 [1] "VSMC-1" [1] 1023 8 [1] "VSMC-4" [1] 1322 8 [1] "VSMC-5" [1] 898 8 [1] "MES-1" [1] 1113 8 [1] "MLA" [1] 526 8
# save(Vcl.DEGs, Vcl.DEGs.sig, file = "keyRdata/Vcl.DEGs.Rdata")
save(Vcl.DEGs, Vcl.DEGs.sig, file = "keyRdata/Vcl.DEGs.all.clusters.Rdata")
# DEGs[[1]][order(DEGs[[1]]$p_val_adj, decreasing = F),]
# Vcl.DEGs[[1]][order(Vcl.DEGs[[1]]$p_val_adj, decreasing = F),]
print(load("keyRdata/Vcl.DEGs.all.clusters.Rdata"))
[1] "Vcl.DEGs" "Vcl.DEGs.sig"
length(names(Vcl.DEGs))
names(Vcl.DEGs)
head(Vcl.DEGs[[1]])
| p_val | avg_logFC | pct.1 | pct.2 | p_val_adj | gene | log2FC | correlation | |
|---|---|---|---|---|---|---|---|---|
| <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <chr> | <dbl> | <dbl> | |
| Xkr4 | 0.65015867 | 0.0007014957 | 0.001 | 0.000 | 1 | Xkr4 | 0.0008085966 | 0.01494109 |
| Gm1992 | 1.00000000 | 0.0000000000 | 0.000 | 0.000 | 1 | Gm1992 | 0.0000000000 | 0.00000000 |
| Rp1 | 1.00000000 | 0.0000000000 | 0.000 | 0.000 | 1 | Rp1 | 0.0000000000 | 0.00000000 |
| Sox17 | 1.00000000 | 0.0000000000 | 0.000 | 0.000 | 1 | Sox17 | 0.0000000000 | 0.00000000 |
| Gm37323 | 1.00000000 | 0.0000000000 | 0.000 | 0.000 | 1 | Gm37323 | 0.0000000000 | 0.00000000 |
| Mrpl15 | 0.06888887 | -0.0284593245 | 0.458 | 0.634 | 1 | Mrpl15 | -0.0724652353 | -0.05565887 |
library(xlsx)
DEG.sig <- lapply(Vcl.DEGs, function(x) {
subset(x, p_val<0.05 & abs(log2FC)>0.1)[,c("gene","p_val","p_val_adj","avg_logFC")]
})
lapply(DEG.sig, dim)
out.file <- "cluster.specific.DEGs.xlsx"
tmp.list <- DEG.sig
sample.list <- unique(names(tmp.list))
for (i in sample.list) {
print(i)
tmp.df <- tmp.list[[i]]
# tmp.df <- subset(tmp.df, pvalue<0.05)
print(dim(tmp.df))
if (i==sample.list[1]) {
write.xlsx(tmp.df, file=out.file, sheetName=i, row.names=F)
} else {
write.xlsx(tmp.df, file=out.file, sheetName=i, append=TRUE, row.names=F)
}
}
[1] "VSMC-7" [1] 1262 4 [1] "SWN" [1] 1038 4 [1] "Neuron" [1] 1352 4 [1] "MES-2" [1] 1337 4 [1] "VSMC-8" [1] 1042 4 [1] "VSMC-6" [1] 824 4 [1] "MES-3" [1] 1109 4 [1] "VSMC-3" [1] 1146 4 [1] "MES-4" [1] 811 4 [1] "VSMC-2" [1] 1126 4 [1] "VSMC-1" [1] 1289 4 [1] "VSMC-4" [1] 1102 4 [1] "VSMC-5" [1] 1257 4 [1] "MES-1" [1] 978 4 [1] "MLA" [1] 509 4
print(load("keyRdata/Vcl.DEGs.Rdata"))
[1] "Vcl.DEGs"
tmp.genes <- "Rgs2"
Vcl.DEGs[[1]][tmp.genes,]
Vcl.DEGs[[2]][tmp.genes,]
Vcl.DEGs[[3]][tmp.genes,]
| p_val | avg_logFC | pct.1 | pct.2 | p_val_adj | gene | log2FC | correlation | |
|---|---|---|---|---|---|---|---|---|
| <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <chr> | <dbl> | <dbl> | |
| Rgs2 | 0.007397128 | -0.02119997 | 0.369 | 0.47 | 1 | Rgs2 | -0.04750919 | -0.03766536 |
| p_val | avg_logFC | pct.1 | pct.2 | p_val_adj | gene | log2FC | correlation | |
|---|---|---|---|---|---|---|---|---|
| <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <chr> | <dbl> | <dbl> | |
| Rgs2 | 0.02279682 | -0.3424198 | 0.176 | 0.444 | 1 | Rgs2 | -0.2674717 | -0.2715347 |
| p_val | avg_logFC | pct.1 | pct.2 | p_val_adj | gene | log2FC | correlation | |
|---|---|---|---|---|---|---|---|---|
| <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <chr> | <dbl> | <dbl> | |
| Rgs2 | 0.02447955 | -0.2766056 | 0.201 | 0.348 | 1 | Rgs2 | -0.1746621 | -0.1504044 |
# tmp.genes <- "Lum"
tmp.expr <- cbind(score=seuratObj@assays$RNA@data[tmp.genes,], seuratObj@meta.data[,c("group","cluster")])
table(tmp.expr$group)
Control Vcl cKO 1610 4892
# tmp.expr$group <- plyr::mapvalues(tmp.expr$group, from = c("vcl", "ct2"), to = c("Vcl cKO","Control"))
tmp.expr$group <- factor(tmp.expr$group, levels = c("Control","Vcl cKO"))
table(tmp.expr$group, tmp.expr$cluster)
MES-1 MES-2 MES-3 MES-4 MLA Mural Neuron SWN VSMC-1 VSMC-2 VSMC-3
Control 22 69 5 8 11 2 139 61 71 247 64
Vcl cKO 37 154 40 33 8 16 273 257 211 464 226
VSMC-4 VSMC-5 VSMC-6 VSMC-7 VSMC-8
Control 36 41 392 161 281
Vcl cKO 34 198 1525 773 643
options(repr.plot.width=5, repr.plot.height=4.2)
library(ggpubr)
p <- ggplot(data=tmp.expr, aes(x=cluster, y=score, fill=group)) +
geom_boxplot() +
theme_bw() +
labs(title = tmp.genes,
x = "Cluster", y = "Relative expression", fill = "") +
# geom_jitter(shape=16, position=position_jitter(0.2)) +
theme(axis.text.x = element_text(face="plain", angle=90, size = 16, color = "black", vjust=0.6),
axis.text.y = element_text(size = 10),
axis.title = element_text(size = 16)) +
# stat_compare_means(comparisons = list(c("c1","c2")), label.y = 0.2, label = "p.signif") +
# scale_y_continuous(limits = c(-0.3, 0.25)) +
scale_fill_manual(values=brewer.pal(9,"Paired")[c(2,8)])
p
ggsave(filename = paste("Figures/", tmp.genes, ".pdf", sep = ""), width = 5, height = 4.2, dpi = 800)
source("https://raw.githubusercontent.com/leezx/Toolsets/master/R/Toolsets.R")
source("https://raw.githubusercontent.com/leezx/Toolsets/master/R/Plot.R")
# if (!require("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
# BiocManager::install("org.Mm.eg.db")
# subset(Vcl.DEGs[[2]], p_val<0.05 & p_val_adj<0.05)
tmp.list <- list()
for (i in names(Vcl.DEGs.sig)){
tmp.list[[i]] <- rownames(Vcl.DEGs.sig[[i]])
}
DEGs.ora.out <- ora.go.kegg.clusterProfiler(geneList = tmp.list, organism="mm")
for (i in names(DEGs.ora.out$go_list)){
write.csv(subset(DEGs.ora.out$go_list[[i]]@result, p.adjust<0.05 & qvalue<0.05 & Count>5),
file = paste(i,".csv",sep=""))
}
# write.csv(subset(DEGs.ora.out$go_list$c1@result, p.adjust<0.05 & qvalue<0.05 & Count>5), file = "c1.csv")
# write.csv(subset(DEGs.ora.out$go_list$c2@result, p.adjust<0.05 & qvalue<0.05 & Count>5), file = "c2.csv")
# write.csv(subset(DEGs.ora.out$go_list$c3@result, p.adjust<0.05 & qvalue<0.05 & Count>5), file = "c3.csv")
print(load("keyRdata/E13.5_CNCC_merged_updated.Rdata"))
[1] "seuset" "markers" "all_tsne"
seuset@active.ident <- factor(seuset@active.ident, levels = c("c1","c2","c3","c4","c5","c6"))
options(repr.plot.width=6, repr.plot.height=5)
p1 <- DimPlot(seuset, reduction = "tsne", label = TRUE, repel = TRUE, label.size = 6, cols = brewer.pal(8,"Set2"))
p1
tmp.cluster <- as.character(cncc.combined.sub@active.ident[names(seuset@active.ident)])
names(tmp.cluster) <- names(seuset@active.ident)
tmp.cluster[is.na(tmp.cluster)] <- " "
tmp.cluster[tmp.cluster=="Mural"] <- " "
tmp.cluster[tmp.cluster=="MLA"] <- " "
rev(sort(unique(tmp.cluster)))
tmp.cluster <- factor(tmp.cluster, levels = c('MES-1','MES-2','MES-3','MES-4','VSMC-1','VSMC-2','VSMC-3',
'VSMC-4','VSMC-5','VSMC-6','VSMC-7','VSMC-8','Neuron','SWN',' ')) # 'MLA','Mural'
seuset$integrated_cluster <- tmp.cluster
# seuset$integrated_cluster_full <- cncc.combined$EMBO[names(seuset@active.ident)]
table(seuset$integrated_cluster, seuset@active.ident)
c6 c5 c4 c1 c2 c3
MES-1 13 1 1 40 2 0
MES-2 5 133 58 20 6 1
MES-3 38 0 5 0 0 1
MES-4 4 6 0 30 1 0
VSMC-1 7 47 18 194 3 5
VSMC-2 0 12 130 314 33 179
VSMC-3 122 133 11 17 1 3
VSMC-4 33 20 5 8 0 0
VSMC-5 20 64 42 109 2 2
VSMC-6 1 1013 848 25 1 16
VSMC-7 34 148 740 1 0 3
VSMC-8 0 23 799 61 5 31
Neuron 1 2 7 0 134 222
SWN 0 1 4 0 252 26
7 31 30 8 15 5
options(repr.plot.width=6, repr.plot.height=5)
p2 <- DimPlot(seuset, reduction = "tsne", cols = myColors[1:15],
label = T, repel = TRUE, label.size = 4, pt.size = 0.5, group.by = "integrated_cluster")
p2
options(repr.plot.width=12, repr.plot.height=5)
cowplot::plot_grid(p1,p2,ncol = 2)
ggsave(filename = "Figures/comparison_before_after_integration.tSNE.pdf", width = 12, height = 5, dpi = 800)
cncc.combined.sub.elly <- subset(cncc.combined.sub, subset = cellName %in% colnames(seuset))
cncc.combined.sub.elly$old_cluster <- seuset@active.ident[colnames(cncc.combined.sub.elly)]
cncc.combined.sub.elly$old_cluster <- factor(cncc.combined.sub.elly$old_cluster , levels = c("c1","c2","c3","c4","c5","c6"))
options(repr.plot.width=6, repr.plot.height=5)
p2 <- DimPlot(cncc.combined.sub.elly, reduction = "umap", cols = brewer.pal(8,"Set2"),
label = T, repel = TRUE, label.size = 4, pt.size = 0.5, group.by = "old_cluster")
p2
options(repr.plot.width=12, repr.plot.height=5)
cowplot::plot_grid(p1,p2,ncol = 2)
ggsave(filename = "Figures/comparison_before_after_integration.UMAP.pdf", width = 12, height = 5, dpi = 800)
# seuset <- RunPCA(seuset, features = unique(c(VariableFeatures(object = cncc.combined), cncc.markers$gene)))
options(repr.plot.width=8, repr.plot.height=7)
p <- DimPlot(cncc.combined, reduction = "umap", label = TRUE, repel = TRUE, group.by = "ident", split.by = "predict") +
geom_vline(xintercept = -7) +
geom_hline(yintercept = 5.4)
# p
all.umap <- p$data
dim(seuset)
all.umap.elly <- subset(all.umap, grepl("vcl|ct", rownames(all.umap)))
table(subset(all.umap, grepl("E12.5", rownames(all.umap)))$ident)
CNCC MES VSMC Neuron SWN MLA Mural Unknown 2065 1182 2517 32 109 4 18 289
table(subset(all.umap, grepl("E13.5", rownames(all.umap)))$ident)
CNCC MES VSMC Neuron SWN MLA Mural Unknown 1581 2755 1277 67 37 24 98 201
table(subset(all.umap, grepl("E14.5", rownames(all.umap)))$ident)
CNCC MES VSMC Neuron SWN MLA Mural Unknown 1083 2009 2047 328 382 182 76 598
all.umap.elly$group <- "Control"
all.umap.elly[grepl("vcl", rownames(all.umap.elly)),]$group <- "Vcl cKO"
head(all.umap.elly)
table(all.umap.elly$group)
| UMAP_1 | UMAP_2 | ident | predict | group | |
|---|---|---|---|---|---|
| <dbl> | <dbl> | <fct> | <chr> | <chr> | |
| vcl_CCTTCGACACCAACCG | -2.612578 | -5.0383182 | VSMC | c5 | Vcl cKO |
| vcl_CTCTAATTCATAGCAC | -2.160392 | -5.5188604 | VSMC | c4 | Vcl cKO |
| vcl_CAGAGAGCAGCGAACA | -12.680368 | -3.0148230 | SWN | c2 | Vcl cKO |
| vcl_AATCCAGGTCGAACAG | 3.421771 | 0.8463258 | MES | c5 | Vcl cKO |
| vcl_CATCAAGTCTGTCAAG | -4.815358 | -2.0471276 | VSMC | c4 | Vcl cKO |
| vcl_CAGAGAGCAGCTGGCT | 4.433164 | 0.1995236 | MES | c5 | Vcl cKO |
Control Vcl cKO 1523 4870
set.seed(49)
all.umap.elly.b <- rbind(subset(all.umap.elly, group=="Control"),
subset(all.umap.elly, group=="Vcl cKO")[sample(1:4870, 1523),]
)
options(repr.plot.width=9, repr.plot.height=5)
g1 <- ggplot(all.umap.elly.b, aes(x=UMAP_1, y=UMAP_2, color=ident)) +
# facet_wrap( ~group , ncol=3) +
facet_grid(cols = vars(group)) +
geom_point(size=0.8, alpha=1) +
geom_density_2d(color='black', size=0.1, alpha=0.2) +
# geom_text(data = facet_centers, mapping = aes(label = merged_cluster), size = 4, color="black") +
labs(x = "UMAP-1",y = "UMAP-2", title = "") +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
# change legend
theme(legend.title=element_blank()) + # legend.position = "none",
guides(colour = guide_legend(override.aes = list(size=5))) +
theme(strip.background = element_rect(fill = "gray97", color = NA)) + # strip background color
# change strip, 17 and bold
theme(strip.placement = "outside", strip.text.x = element_text(face="bold", size = 17), #italic
strip.text.y = element_text(face="plain", size = 11)) +
theme(panel.spacing=unit(.3, "lines"),
panel.border = element_rect(color = "black", fill = NA, size = 0.2,colour = "black")) +
# change xy axis and title text size
# axis test: 10
# axis title: 14, with vjust or hjust
theme(axis.text = element_text(face="plain", angle=0, size = 10, color = "black"),
axis.title.y =element_text(size = 14), axis.title.x =element_text(size = 14, vjust=-1)) +
scale_color_manual(values=brewer.pal(8,"Set1"))
g1
table(all.umap.elly.b$group, all.umap.elly.b$ident)
CNCC MES VSMC Neuron SWN MLA Mural Unknown
Control 309 104 927 103 62 8 1 9
Vcl cKO 237 92 1035 71 76 2 4 6
all.umap.elly.b$full_cluster <- cncc.combined$EMBO[rownames(all.umap.elly.b)]
options(repr.plot.width=9, repr.plot.height=5)
g1 <- ggplot(all.umap.elly.b, aes(x=UMAP_1, y=UMAP_2, color=full_cluster)) +
# facet_wrap( ~group , ncol=3) +
facet_grid(cols = vars(group)) +
geom_point(size=0.8, alpha=1) +
geom_density_2d(color='black', size=0.1, alpha=0.2) +
# geom_text(data = facet_centers, mapping = aes(label = merged_cluster), size = 4, color="black") +
labs(x = "UMAP-1",y = "UMAP-2", title = "") +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
# change legend
theme(legend.title=element_blank()) + # legend.position = "none",
guides(colour = guide_legend(override.aes = list(size=5))) +
theme(strip.background = element_rect(fill = "gray97", color = NA)) + # strip background color
# change strip, 17 and bold
theme(strip.placement = "outside", strip.text.x = element_text(face="bold", size = 17), #italic
strip.text.y = element_text(face="plain", size = 11)) +
theme(panel.spacing=unit(.3, "lines"),
panel.border = element_rect(color = "black", fill = NA, size = 0.2,colour = "black")) +
# change xy axis and title text size
# axis test: 10
# axis title: 14, with vjust or hjust
theme(axis.text = element_text(face="plain", angle=0, size = 10, color = "black"),
axis.title.y =element_text(size = 14), axis.title.x =element_text(size = 14, vjust=-1)) +
scale_color_manual(values=c(brewer.pal(10,"Set1"), brewer.pal(10,"Set1")))
g1
Warning message in brewer.pal(10, "Set1"): “n too large, allowed maximum for palette Set1 is 9 Returning the palette you asked for with that many colors ” Warning message in brewer.pal(10, "Set1"): “n too large, allowed maximum for palette Set1 is 9 Returning the palette you asked for with that many colors ”
table(all.umap.elly.b$group, all.umap.elly.b$full_cluster)
VSMC-3 MES-3 VSMC-4 MES-2 CNCC-1 CNCC-2 VSMC-1 CNCC-3 VSMC-5 Unknown
Control 484 6 58 96 31 176 255 102 5 9
Vcl cKO 633 8 34 78 18 150 209 69 33 6
VSMC-2 SWN VSMC-7 MES-1 VSMC-6 Neuron Mural MLA
Control 70 62 20 2 35 103 1 8
Vcl cKO 95 76 10 6 21 71 4 2
library(monocle)
head(all.umap.elly.b)
| UMAP_1 | UMAP_2 | ident | predict | group | full_cluster | |
|---|---|---|---|---|---|---|
| <dbl> | <dbl> | <fct> | <chr> | <chr> | <fct> | |
| ct2_AAACCTGCAGTTAACC | 2.7239356 | -6.111257 | VSMC | c6 | Control | VSMC-7 |
| ct2_AAACCTGGTTGACGTT | -4.7029686 | -2.880406 | VSMC | c5 | Control | VSMC-1 |
| ct2_AAACGGGGTTCCACTC | -0.8335619 | -3.484131 | VSMC | c5 | Control | VSMC-3 |
| ct2_AAACGGGTCAACCATG | -2.8774552 | 1.587930 | VSMC | c4 | Control | VSMC-2 |
| ct2_AAACGGGTCCCACTTG | -5.0447192 | 12.960110 | Neuron | c2 | Control | Neuron |
| ct2_AAAGATGCACCAACCG | -2.8675112 | -4.743443 | VSMC | c4 | Control | VSMC-3 |
all_tsne <- subset(all.umap.elly.b, ident %in% c("CNCC","VSMC","MES"))
pd <- new("AnnotatedDataFrame", data = all_tsne)
fd <- new("AnnotatedDataFrame", data = data.frame(gene_short_name=rownames(seuset@assays$RNA@counts),
row.names = rownames(seuset@assays$RNA@counts)))
cds <- newCellDataSet(as.matrix(seuset@assays$RNA@counts[,rownames(all_tsne)]),
phenoData = pd,
featureData = fd,
expressionFamily=negbinomial.size())
cds <- estimateSizeFactors(cds)
cds <- estimateDispersions(cds)
cds <- detectGenes(cds, min_expr = 0.1)
print(head(fData(cds)))
Removing 168 outliers
gene_short_name num_cells_expressed Xkr4 Xkr4 8 Mrpl15 Mrpl15 1705 Lypla1 Lypla1 1114 Tcea1 Tcea1 1733 Rgs20 Rgs20 32 Atp6v1h Atp6v1h 803
cds <- detectGenes(cds, min_expr = 0.1)
print(head(fData(cds)))
expressed_genes <- row.names(subset(fData(cds), num_cells_expressed >= 10))
table(cds$ident)
gene_short_name num_cells_expressed Xkr4 Xkr4 8 Mrpl15 Mrpl15 1705 Lypla1 Lypla1 1114 Tcea1 Tcea1 1733 Rgs20 Rgs20 32 Atp6v1h Atp6v1h 803
CNCC MES VSMC Neuron SWN MLA Mural Unknown
546 196 1962 0 0 0 0 0
# time-consuming step
diff_test_res <- differentialGeneTest(cds[expressed_genes,], fullModelFormulaStr = "~ident")
# head(diff_test_res)
ordering_genes <- row.names (subset(diff_test_res, qval < 0.01))
ordering_genes <- row.names (subset(diff_test_res, qval < 0.00001 & num_cells_expressed > 50 & num_cells_expressed < 1000))
length(ordering_genes)
head(ordering_genes)
length(ordering_genes)
# # set my genes
# ordering_genes <- unique(markers$gene)
# length(ordering_genes)
cds <- setOrderingFilter(cds, ordering_genes)
tmp <- subset(cncc.markers, cluster %in% c("CNCC","MES","VSMC"))$gene
# VariableFeatures(object = cncc.combined),
# cncc.markers$gene
cds <- setOrderingFilter(cds, unique(tmp))
cds <- reduceDimension(cds, max_components = 2, method = 'DDRTree')
cds <- orderCells(cds)
table(cds$ident)
CNCC MES VSMC Neuron SWN MLA Mural Unknown
546 196 1962 0 0 0 0 0
options(repr.plot.width=5, repr.plot.height=5)
plot_cell_trajectory(cds, color_by = "ident", show_backbone = F, show_tree = T,
state_number_size = 0, show_branch_points = F) +
scale_color_manual(values = brewer.pal(8,"Set2")[c(1,4,6)])
save(cds, file = "monocle.trajectory.CNCC.MES.VSMC.Rdata")
tmp.df <- Read10X("/home/lizhixin/project/scRNA-seq/rawData/10x/Vcl-YFP-CNCC_report_YFP/outs/filtered_feature_bc_matrix")
# tmp.df <- Read10X("/home/lizhixin/project/scRNA-seq/rawData/10x/Vcl-YFP-CNCC_report/outs/filtered_gene_bc_matrices/mm10/")
dim(tmp.df)
dim(tmp.df)
options(repr.plot.width=4, repr.plot.height=4)
hist(tmp.df["YFP",], breaks = 10, xlab = "YFP expression", main = "Detection of YFP in \nVcl-YFP-CNCC")
3276 / (3276 + 9683)
table(tmp.df["YFP",]>0)
FALSE TRUE 3276 9683
table(tmp.df["YFP",]>0)
FALSE TRUE 4082 7478